| OLD | NEW |
| 1 # Copyright 2015 The Chromium Authors. All rights reserved. | 1 # Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 import time | 4 import time |
| 5 | 5 |
| 6 from profile_creators import profile_extender | 6 from profile_creators import profile_extender |
| 7 from telemetry.core import exceptions | 7 from telemetry.core import exceptions |
| 8 from telemetry.core import util |
| 8 | 9 |
| 9 | 10 |
| 10 class FastNavigationProfileExtender(profile_extender.ProfileExtender): | 11 class FastNavigationProfileExtender(profile_extender.ProfileExtender): |
| 11 """Extends a Chrome profile. | 12 """Extends a Chrome profile. |
| 12 | 13 |
| 13 This class creates or extends an existing profile by performing a set of tab | 14 This class creates or extends an existing profile by performing a set of tab |
| 14 navigations in large batches. This is accomplished by opening a large number | 15 navigations in large batches. This is accomplished by opening a large number |
| 15 of tabs, simultaneously navigating all the tabs, and then waiting for all the | 16 of tabs, simultaneously navigating all the tabs, and then waiting for all the |
| 16 tabs to load. This provides two benefits: | 17 tabs to load. This provides two benefits: |
| 17 - Takes advantage of the high number of logical cores on modern CPUs. | 18 - Takes advantage of the high number of logical cores on modern CPUs. |
| (...skipping 11 matching lines...) Expand all Loading... |
| 29 super(FastNavigationProfileExtender, self).__init__(finder_options) | 30 super(FastNavigationProfileExtender, self).__init__(finder_options) |
| 30 | 31 |
| 31 # The instance keeps a list of Tabs that can be navigated successfully. | 32 # The instance keeps a list of Tabs that can be navigated successfully. |
| 32 # This means that the Tab is not crashed, and is processing JavaScript in a | 33 # This means that the Tab is not crashed, and is processing JavaScript in a |
| 33 # timely fashion. | 34 # timely fashion. |
| 34 self._navigation_tabs = [] | 35 self._navigation_tabs = [] |
| 35 | 36 |
| 36 # The number of tabs to use. | 37 # The number of tabs to use. |
| 37 self._NUM_TABS = maximum_batch_size | 38 self._NUM_TABS = maximum_batch_size |
| 38 | 39 |
| 39 # The amount of time to wait for a batch of pages to finish loading. | 40 # The amount of additional time to wait for a batch of pages to finish |
| 40 self._BATCH_PAGE_LOAD_TIMEOUT_IN_SECONDS = 10 | 41 # loading for each page in the batch. |
| 42 self._BATCH_TIMEOUT_PER_PAGE_IN_SECONDS = 20 |
| 41 | 43 |
| 42 # The default amount of time to wait for the retrieval of the URL of a tab. | 44 # The amount of time to wait for a page to quiesce. Some pages will never |
| 43 self._TAB_URL_RETRIEVAL_TIMEOUT_IN_SECONDS = 1 | 45 # quiesce. |
| 46 self._TIME_TO_WAIT_FOR_PAGE_TO_QUIESCE_IN_SECONDS = 10 |
| 44 | 47 |
| 45 def Run(self): | 48 def Run(self): |
| 46 """Superclass override.""" | 49 """Superclass override.""" |
| 47 try: | 50 try: |
| 48 self.SetUpBrowser() | 51 self.SetUpBrowser() |
| 49 self._PerformNavigations() | 52 self._PerformNavigations() |
| 50 finally: | 53 finally: |
| 51 self.TearDownBrowser() | 54 self.TearDownBrowser() |
| 52 | 55 |
| 56 # When there hasn't been an exception, verify that the profile was |
| 57 # correctly extended. |
| 58 # TODO(erikchen): I've intentionally omitted my implementation of |
| 59 # VerifyProfileWasExtended() in small_profile_extender, since the profile |
| 60 # is not being correctly extended. http://crbug.com/484833 |
| 61 # http://crbug.com/484880 |
| 62 self.VerifyProfileWasExtended() |
| 63 |
| 64 def VerifyProfileWasExtended(self): |
| 65 """Verifies that the profile was correctly extended. |
| 66 |
| 67 Can be overridden by subclasses. |
| 68 """ |
| 69 pass |
| 70 |
| 53 def GetUrlIterator(self): | 71 def GetUrlIterator(self): |
| 54 """Gets URLs for the browser to navigate to. | 72 """Gets URLs for the browser to navigate to. |
| 55 | 73 |
| 56 Intended for subclass override. | 74 Intended for subclass override. |
| 57 | 75 |
| 58 Returns: | 76 Returns: |
| 59 An iterator whose elements are urls to be navigated to. | 77 An iterator whose elements are urls to be navigated to. |
| 60 """ | 78 """ |
| 61 raise NotImplementedError() | 79 raise NotImplementedError() |
| 62 | 80 |
| 63 def ShouldExitAfterBatchNavigation(self): | 81 def ShouldExitAfterBatchNavigation(self): |
| 64 """Returns a boolean indicating whether profile extension is finished. | 82 """Returns a boolean indicating whether profile extension is finished. |
| 65 | 83 |
| 66 Intended for subclass override. | 84 Intended for subclass override. |
| 67 """ | 85 """ |
| 68 raise NotImplementedError() | 86 raise NotImplementedError() |
| 69 | 87 |
| 70 def CleanUpAfterBatchNavigation(self): | 88 def CleanUpAfterBatchNavigation(self): |
| 71 """A hook for subclasses to perform cleanup after each batch of | 89 """A hook for subclasses to perform cleanup after each batch of |
| 72 navigations. | 90 navigations. |
| 73 | 91 |
| 74 Can be overridden by subclasses. | 92 Can be overridden by subclasses. |
| 75 """ | 93 """ |
| 76 pass | 94 pass |
| 77 | 95 |
| 78 def _AddNewTab(self): | |
| 79 """Adds a new tab to the browser.""" | |
| 80 | |
| 81 # Adding a new tab requires making a request over devtools. This can fail | |
| 82 # for a variety of reasons. Retry 3 times. | |
| 83 retry_count = 3 | |
| 84 for i in range(retry_count): | |
| 85 try: | |
| 86 self._navigation_tabs.append(self._browser.tabs.New()) | |
| 87 except exceptions.Error: | |
| 88 if i == retry_count - 1: | |
| 89 raise | |
| 90 else: | |
| 91 break | |
| 92 | |
| 93 def _RefreshNavigationTabs(self): | 96 def _RefreshNavigationTabs(self): |
| 94 """Updates the member self._navigation_tabs to contain self._NUM_TABS | 97 """Updates the member self._navigation_tabs to contain self._NUM_TABS |
| 95 elements, each of which is not crashed. The crashed tabs are intentionally | 98 elements, each of which is not crashed. The crashed tabs are intentionally |
| 96 leaked, since Telemetry doesn't have a good way of killing crashed tabs. | 99 leaked, since Telemetry doesn't have a good way of killing crashed tabs. |
| 97 | 100 |
| 98 It is also possible for a tab to be stalled in an infinite JavaScript loop. | 101 It is also possible for a tab to be stalled in an infinite JavaScript loop. |
| 99 These tabs will be in self.browser.tabs, but not in self._navigation_tabs. | 102 These tabs will be in self.browser.tabs, but not in self._navigation_tabs. |
| 100 There is no way to kill these tabs, so they are also leaked. This method is | 103 There is no way to kill these tabs, so they are also leaked. This method is |
| 101 careful to only use tabs in self._navigation_tabs, or newly created tabs. | 104 careful to only use tabs in self._navigation_tabs, or newly created tabs. |
| 102 """ | 105 """ |
| 103 live_tabs = [tab for tab in self._navigation_tabs if tab.IsAlive()] | 106 live_tabs = [tab for tab in self._navigation_tabs if tab.IsAlive()] |
| 104 self._navigation_tabs = live_tabs | 107 self._navigation_tabs = live_tabs |
| 105 | 108 |
| 106 while len(self._navigation_tabs) < self._NUM_TABS: | 109 while len(self._navigation_tabs) < self._NUM_TABS: |
| 107 self._AddNewTab() | 110 self._navigation_tabs.append(self._browser.tabs.New()) |
| 108 | 111 |
| 109 def _RemoveNavigationTab(self, tab): | 112 def _RemoveNavigationTab(self, tab): |
| 110 """Removes a tab which is no longer in a useable state from | 113 """Removes a tab which is no longer in a useable state from |
| 111 self._navigation_tabs. The tab is not removed from self.browser.tabs, | 114 self._navigation_tabs. The tab is not removed from self.browser.tabs, |
| 112 since there is no guarantee that the tab can be safely removed.""" | 115 since there is no guarantee that the tab can be safely removed.""" |
| 113 self._navigation_tabs.remove(tab) | 116 self._navigation_tabs.remove(tab) |
| 114 | 117 |
| 115 def _RetrieveTabUrl(self, tab, timeout): | 118 def _RetrieveTabUrl(self, tab, timeout): |
| 116 """Retrives the URL of the tab.""" | 119 """Retrives the URL of the tab.""" |
| 117 try: | 120 # TODO(erikchen): Use tab.url instead, which talks to the browser process |
| 118 return tab.EvaluateJavaScript('document.URL', timeout) | 121 # instead of the renderer process. http://crbug.com/486119 |
| 119 except exceptions.Error: | 122 return tab.EvaluateJavaScript('document.URL', timeout) |
| 120 return None | |
| 121 | 123 |
| 122 def _WaitForUrlToChange(self, tab, initial_url, timeout): | 124 def _WaitForUrlToChange(self, tab, initial_url, end_time): |
| 123 """Waits for the tab to navigate away from its initial url.""" | 125 """Waits for the tab to navigate away from its initial url. |
| 124 end_time = time.time() + timeout | 126 |
| 127 If time.time() is larger than end_time, the function does nothing. |
| 128 Otherwise, the function tries to return no later than end_time. |
| 129 """ |
| 125 while True: | 130 while True: |
| 126 seconds_to_wait = end_time - time.time() | 131 seconds_to_wait = end_time - time.time() |
| 127 seconds_to_wait = max(0, seconds_to_wait) | 132 if seconds_to_wait <= 0: |
| 128 | |
| 129 if seconds_to_wait == 0: | |
| 130 break | 133 break |
| 131 | 134 |
| 132 current_url = self._RetrieveTabUrl(tab, seconds_to_wait) | 135 current_url = self._RetrieveTabUrl(tab, seconds_to_wait) |
| 133 if current_url != initial_url: | 136 if current_url != initial_url and current_url != "": |
| 134 break | 137 break |
| 135 | 138 |
| 136 # Retrieving the current url is a non-trivial operation. Add a small | 139 # Retrieving the current url is a non-trivial operation. Add a small |
| 137 # sleep here to prevent this method from contending with the actual | 140 # sleep here to prevent this method from contending with the actual |
| 138 # navigation. | 141 # navigation. |
| 139 time.sleep(0.01) | 142 time.sleep(0.01) |
| 140 | 143 |
| 144 def _WaitForTabToBeReady(self, tab, end_time): |
| 145 """Waits for the tab to be ready. |
| 146 |
| 147 If time.time() is larger than end_time, the function does nothing. |
| 148 Otherwise, the function tries to return no later than end_time. |
| 149 """ |
| 150 seconds_to_wait = end_time - time.time() |
| 151 if seconds_to_wait <= 0: |
| 152 return |
| 153 tab.WaitForDocumentReadyStateToBeComplete(seconds_to_wait) |
| 154 |
| 155 # Wait up to 10 seconds for the page to quiesce. If the page hasn't |
| 156 # quiesced in 10 seconds, it will probably never quiesce. |
| 157 seconds_to_wait = end_time - time.time() |
| 158 seconds_to_wait = max(0, seconds_to_wait) |
| 159 try: |
| 160 util.WaitFor(tab.HasReachedQuiescence, seconds_to_wait) |
| 161 except exceptions.TimeoutException: |
| 162 pass |
| 163 |
| 141 def _BatchNavigateTabs(self, batch): | 164 def _BatchNavigateTabs(self, batch): |
| 142 """Performs a batch of tab navigations with minimal delay. | 165 """Performs a batch of tab navigations with minimal delay. |
| 143 | 166 |
| 144 Args: | 167 Args: |
| 145 batch: A list of tuples (tab, url). | 168 batch: A list of tuples (tab, url). |
| 146 | 169 |
| 147 Returns: | 170 Returns: |
| 148 A list of tuples (tab, initial_url). |initial_url| is the url of the | 171 A list of tuples (tab, initial_url). |initial_url| is the url of the |
| 149 |tab| prior to a navigation command being sent to it. | 172 |tab| prior to a navigation command being sent to it. |
| 150 """ | 173 """ |
| 151 timeout_in_seconds = 0 | 174 # Attempting to pass in a timeout of 0 seconds results in a synchronous |
| 175 # socket error from the websocket library. Pass in a very small timeout |
| 176 # instead so that the websocket library raises a Timeout exception. This |
| 177 # prevents the logic from accidentally catching different socket |
| 178 # exceptions. |
| 179 timeout_in_seconds = 0.01 |
| 152 | 180 |
| 153 queued_tabs = [] | 181 queued_tabs = [] |
| 154 for tab, url in batch: | 182 for tab, url in batch: |
| 155 initial_url = self._RetrieveTabUrl(tab, | 183 initial_url = self._RetrieveTabUrl(tab, 20) |
| 156 self._TAB_URL_RETRIEVAL_TIMEOUT_IN_SECONDS) | |
| 157 | |
| 158 try: | 184 try: |
| 159 tab.Navigate(url, None, timeout_in_seconds) | 185 tab.Navigate(url, None, timeout_in_seconds) |
| 160 except exceptions.Error: | 186 except exceptions.TimeoutException: |
| 161 # We expect a time out. It's possible for other problems to arise, but | 187 # We expect to receive a timeout exception, since we're not waiting for |
| 162 # this method is not responsible for dealing with them. Ignore all | 188 # the navigation to complete. |
| 163 # exceptions. | |
| 164 pass | 189 pass |
| 165 | |
| 166 queued_tabs.append((tab, initial_url)) | 190 queued_tabs.append((tab, initial_url)) |
| 167 return queued_tabs | 191 return queued_tabs |
| 168 | 192 |
| 169 def _WaitForQueuedTabsToLoad(self, queued_tabs): | 193 def _WaitForQueuedTabsToLoad(self, queued_tabs): |
| 170 """Waits for all the batch navigated tabs to finish loading. | 194 """Waits for all the batch navigated tabs to finish loading. |
| 171 | 195 |
| 172 Args: | 196 Args: |
| 173 queued_tabs: A list of tuples (tab, initial_url). Each tab is guaranteed | 197 queued_tabs: A list of tuples (tab, initial_url). Each tab is guaranteed |
| 174 to have already been sent a navigation command. | 198 to have already been sent a navigation command. |
| 175 """ | 199 """ |
| 176 end_time = time.time() + self._BATCH_PAGE_LOAD_TIMEOUT_IN_SECONDS | 200 total_batch_timeout = (len(queued_tabs) * |
| 201 self._BATCH_TIMEOUT_PER_PAGE_IN_SECONDS) |
| 202 end_time = time.time() + total_batch_timeout |
| 177 for tab, initial_url in queued_tabs: | 203 for tab, initial_url in queued_tabs: |
| 178 seconds_to_wait = end_time - time.time() | 204 # Since we didn't wait any time for the tab url navigation to commit, it's |
| 179 seconds_to_wait = max(0, seconds_to_wait) | |
| 180 | |
| 181 if seconds_to_wait == 0: | |
| 182 break | |
| 183 | |
| 184 # Since we don't wait any time for the tab url navigation to commit, it's | |
| 185 # possible that the tab hasn't started navigating yet. | 205 # possible that the tab hasn't started navigating yet. |
| 186 self._WaitForUrlToChange(tab, initial_url, seconds_to_wait) | 206 self._WaitForUrlToChange(tab, initial_url, end_time) |
| 187 | 207 self._WaitForTabToBeReady(tab, end_time) |
| 188 seconds_to_wait = end_time - time.time() | |
| 189 seconds_to_wait = max(0, seconds_to_wait) | |
| 190 | |
| 191 try: | |
| 192 tab.WaitForDocumentReadyStateToBeComplete(seconds_to_wait) | |
| 193 except exceptions.TimeoutException: | |
| 194 # Ignore time outs. | |
| 195 pass | |
| 196 except exceptions.Error: | |
| 197 # If any error occurs, remove the tab. it's probably in an | |
| 198 # unrecoverable state. | |
| 199 self._RemoveNavigationTab(tab) | |
| 200 | 208 |
| 201 def _GetUrlsToNavigate(self, url_iterator): | 209 def _GetUrlsToNavigate(self, url_iterator): |
| 202 """Returns an array of urls to navigate to, given a url_iterator.""" | 210 """Returns an array of urls to navigate to, given a url_iterator.""" |
| 203 urls = [] | 211 urls = [] |
| 204 for _ in xrange(self._NUM_TABS): | 212 for _ in xrange(self._NUM_TABS): |
| 205 try: | 213 try: |
| 206 urls.append(url_iterator.next()) | 214 urls.append(url_iterator.next()) |
| 207 except StopIteration: | 215 except StopIteration: |
| 208 break | 216 break |
| 209 return urls | 217 return urls |
| (...skipping 17 matching lines...) Expand all Loading... |
| 227 tab = self._navigation_tabs[i] | 235 tab = self._navigation_tabs[i] |
| 228 batch.append((tab, url)) | 236 batch.append((tab, url)) |
| 229 | 237 |
| 230 queued_tabs = self._BatchNavigateTabs(batch) | 238 queued_tabs = self._BatchNavigateTabs(batch) |
| 231 self._WaitForQueuedTabsToLoad(queued_tabs) | 239 self._WaitForQueuedTabsToLoad(queued_tabs) |
| 232 | 240 |
| 233 self.CleanUpAfterBatchNavigation() | 241 self.CleanUpAfterBatchNavigation() |
| 234 | 242 |
| 235 if self.ShouldExitAfterBatchNavigation(): | 243 if self.ShouldExitAfterBatchNavigation(): |
| 236 break | 244 break |
| OLD | NEW |