Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 # Copyright 2015 The Chromium Authors. All rights reserved. | 1 # Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 import time | 4 import time |
| 5 | 5 |
| 6 from profile_creators import profile_extender | 6 from profile_creators import profile_extender |
| 7 from telemetry.core import exceptions | 7 from telemetry.core import exceptions |
| 8 from telemetry.core import util | |
| 8 | 9 |
| 9 | 10 |
| 10 class FastNavigationProfileExtender(profile_extender.ProfileExtender): | 11 class FastNavigationProfileExtender(profile_extender.ProfileExtender): |
| 11 """Extends a Chrome profile. | 12 """Extends a Chrome profile. |
| 12 | 13 |
| 13 This class creates or extends an existing profile by performing a set of tab | 14 This class creates or extends an existing profile by performing a set of tab |
| 14 navigations in large batches. This is accomplished by opening a large number | 15 navigations in large batches. This is accomplished by opening a large number |
| 15 of tabs, simultaneously navigating all the tabs, and then waiting for all the | 16 of tabs, simultaneously navigating all the tabs, and then waiting for all the |
| 16 tabs to load. This provides two benefits: | 17 tabs to load. This provides two benefits: |
| 17 - Takes advantage of the high number of logical cores on modern CPUs. | 18 - Takes advantage of the high number of logical cores on modern CPUs. |
| (...skipping 11 matching lines...) Expand all Loading... | |
| 29 super(FastNavigationProfileExtender, self).__init__(finder_options) | 30 super(FastNavigationProfileExtender, self).__init__(finder_options) |
| 30 | 31 |
| 31 # The instance keeps a list of Tabs that can be navigated successfully. | 32 # The instance keeps a list of Tabs that can be navigated successfully. |
| 32 # This means that the Tab is not crashed, and is processing JavaScript in a | 33 # This means that the Tab is not crashed, and is processing JavaScript in a |
| 33 # timely fashion. | 34 # timely fashion. |
| 34 self._navigation_tabs = [] | 35 self._navigation_tabs = [] |
| 35 | 36 |
| 36 # The number of tabs to use. | 37 # The number of tabs to use. |
| 37 self._NUM_TABS = maximum_batch_size | 38 self._NUM_TABS = maximum_batch_size |
| 38 | 39 |
| 39 # The amount of time to wait for a batch of pages to finish loading. | 40 # The amount of additional time to wait for a batch of pages to finish |
| 40 self._BATCH_PAGE_LOAD_TIMEOUT_IN_SECONDS = 10 | 41 # loading for each page in the batch. |
| 42 self._BATCH_TIMEOUT_PER_PAGE_IN_SECONDS = 20 | |
| 41 | 43 |
| 42 # The default amount of time to wait for the retrieval of the URL of a tab. | 44 # The amount of time to wait for a page to quiesce. Some pages will never |
| 43 self._TAB_URL_RETRIEVAL_TIMEOUT_IN_SECONDS = 1 | 45 # quiesce. |
| 46 self._TIME_TO_WAIT_FOR_PAGE_TO_QUIESCE_IN_SECONDS = 10 | |
| 44 | 47 |
| 45 def Run(self): | 48 def Run(self): |
| 46 """Superclass override.""" | 49 """Superclass override.""" |
| 47 try: | 50 try: |
| 48 self.SetUpBrowser() | 51 self.SetUpBrowser() |
| 49 self._PerformNavigations() | 52 self._PerformNavigations() |
| 50 finally: | 53 finally: |
| 51 self.TearDownBrowser() | 54 self.TearDownBrowser() |
| 55 self.VerifyProfileWasExtended() | |
|
erikchen
2015/05/05 22:16:45
I'm intentionally omitting my implementation of Ve
nednguyen
2015/05/07 22:39:20
These notes to the code reviewers should also be p
erikchen
2015/05/08 19:47:58
Done.
| |
| 52 | 56 |
| 53 def GetUrlIterator(self): | 57 def GetUrlIterator(self): |
| 54 """Gets URLs for the browser to navigate to. | 58 """Gets URLs for the browser to navigate to. |
| 55 | 59 |
| 56 Intended for subclass override. | 60 Intended for subclass override. |
| 57 | 61 |
| 58 Returns: | 62 Returns: |
| 59 An iterator whose elements are urls to be navigated to. | 63 An iterator whose elements are urls to be navigated to. |
| 60 """ | 64 """ |
| 61 raise NotImplementedError() | 65 raise NotImplementedError() |
| 62 | 66 |
| 63 def ShouldExitAfterBatchNavigation(self): | 67 def ShouldExitAfterBatchNavigation(self): |
| 64 """Returns a boolean indicating whether profile extension is finished. | 68 """Returns a boolean indicating whether profile extension is finished. |
| 65 | 69 |
| 66 Intended for subclass override. | 70 Intended for subclass override. |
| 67 """ | 71 """ |
| 68 raise NotImplementedError() | 72 raise NotImplementedError() |
| 69 | 73 |
| 70 def CleanUpAfterBatchNavigation(self): | 74 def CleanUpAfterBatchNavigation(self): |
| 71 """A hook for subclasses to perform cleanup after each batch of | 75 """A hook for subclasses to perform cleanup after each batch of |
| 72 navigations. | 76 navigations. |
| 73 | 77 |
| 74 Can be overridden by subclasses. | 78 Can be overridden by subclasses. |
| 75 """ | 79 """ |
| 76 pass | 80 pass |
| 77 | 81 |
| 78 def _AddNewTab(self): | 82 def _AddNewTab(self): |
| 79 """Adds a new tab to the browser.""" | 83 """Adds a new tab to the browser.""" |
| 80 | 84 self._navigation_tabs.append(self._browser.tabs.New()) |
| 81 # Adding a new tab requires making a request over devtools. This can fail | |
| 82 # for a variety of reasons. Retry 3 times. | |
| 83 retry_count = 3 | |
| 84 for i in range(retry_count): | |
| 85 try: | |
| 86 self._navigation_tabs.append(self._browser.tabs.New()) | |
| 87 except exceptions.Error: | |
| 88 if i == retry_count - 1: | |
| 89 raise | |
| 90 else: | |
| 91 break | |
| 92 | 85 |
| 93 def _RefreshNavigationTabs(self): | 86 def _RefreshNavigationTabs(self): |
| 94 """Updates the member self._navigation_tabs to contain self._NUM_TABS | 87 """Updates the member self._navigation_tabs to contain self._NUM_TABS |
| 95 elements, each of which is not crashed. The crashed tabs are intentionally | 88 elements, each of which is not crashed. The crashed tabs are intentionally |
| 96 leaked, since Telemetry doesn't have a good way of killing crashed tabs. | 89 leaked, since Telemetry doesn't have a good way of killing crashed tabs. |
|
dtu
2015/05/07 21:50:36
I have a question. What happens if you call tab.Cl
erikchen
2015/05/08 19:47:58
The exception: "DevtoolsTargetCrashException: Devt
| |
| 97 | 90 |
| 98 It is also possible for a tab to be stalled in an infinite JavaScript loop. | 91 It is also possible for a tab to be stalled in an infinite JavaScript loop. |
| 99 These tabs will be in self.browser.tabs, but not in self._navigation_tabs. | 92 These tabs will be in self.browser.tabs, but not in self._navigation_tabs. |
| 100 There is no way to kill these tabs, so they are also leaked. This method is | 93 There is no way to kill these tabs, so they are also leaked. This method is |
| 101 careful to only use tabs in self._navigation_tabs, or newly created tabs. | 94 careful to only use tabs in self._navigation_tabs, or newly created tabs. |
| 102 """ | 95 """ |
| 103 live_tabs = [tab for tab in self._navigation_tabs if tab.IsAlive()] | 96 live_tabs = [tab for tab in self._navigation_tabs if tab.IsAlive()] |
| 104 self._navigation_tabs = live_tabs | 97 self._navigation_tabs = live_tabs |
| 105 | 98 |
| 106 while len(self._navigation_tabs) < self._NUM_TABS: | 99 while len(self._navigation_tabs) < self._NUM_TABS: |
| 107 self._AddNewTab() | 100 self._AddNewTab() |
| 108 | 101 |
| 109 def _RemoveNavigationTab(self, tab): | 102 def _RemoveNavigationTab(self, tab): |
| 110 """Removes a tab which is no longer in a useable state from | 103 """Removes a tab which is no longer in a useable state from |
| 111 self._navigation_tabs. The tab is not removed from self.browser.tabs, | 104 self._navigation_tabs. The tab is not removed from self.browser.tabs, |
| 112 since there is no guarantee that the tab can be safely removed.""" | 105 since there is no guarantee that the tab can be safely removed.""" |
| 113 self._navigation_tabs.remove(tab) | 106 self._navigation_tabs.remove(tab) |
| 114 | 107 |
| 115 def _RetrieveTabUrl(self, tab, timeout): | 108 def _RetrieveTabUrl(self, tab, timeout): |
| 116 """Retrives the URL of the tab.""" | 109 """Retrives the URL of the tab.""" |
| 117 try: | 110 return tab.EvaluateJavaScript('document.URL', timeout) |
|
dtu
2015/05/07 21:50:36
Does tab.url not work in this case? (tab.url uses
erikchen
2015/05/08 19:47:58
That's a good suggestion - I've added a comment he
| |
| 118 return tab.EvaluateJavaScript('document.URL', timeout) | |
| 119 except exceptions.Error: | |
| 120 return None | |
| 121 | 111 |
| 122 def _WaitForUrlToChange(self, tab, initial_url, timeout): | 112 def _WaitForUrlToChange(self, tab, initial_url, end_time): |
| 123 """Waits for the tab to navigate away from its initial url.""" | 113 """Waits for the tab to navigate away from its initial url. |
| 124 end_time = time.time() + timeout | 114 |
| 115 If time.time() is larger than end_time, the function does nothing. | |
| 116 Otherwise, the function tries to return no later than end_time. | |
| 117 """ | |
| 125 while True: | 118 while True: |
| 126 seconds_to_wait = end_time - time.time() | 119 seconds_to_wait = end_time - time.time() |
| 127 seconds_to_wait = max(0, seconds_to_wait) | 120 seconds_to_wait = max(0, seconds_to_wait) |
| 128 | 121 |
| 129 if seconds_to_wait == 0: | 122 if seconds_to_wait == 0: |
| 130 break | 123 break |
| 131 | 124 |
| 132 current_url = self._RetrieveTabUrl(tab, seconds_to_wait) | 125 current_url = self._RetrieveTabUrl(tab, seconds_to_wait) |
| 133 if current_url != initial_url: | 126 if current_url != initial_url and current_url != "": |
| 134 break | 127 break |
| 135 | 128 |
| 136 # Retrieving the current url is a non-trivial operation. Add a small | 129 # Retrieving the current url is a non-trivial operation. Add a small |
| 137 # sleep here to prevent this method from contending with the actual | 130 # sleep here to prevent this method from contending with the actual |
| 138 # navigation. | 131 # navigation. |
| 139 time.sleep(0.01) | 132 time.sleep(0.01) |
| 140 | 133 |
| 134 def _WaitForTabToBeReady(self, tab, end_time): | |
| 135 """Waits for the tab to be ready. | |
| 136 | |
| 137 If time.time() is larger than end_time, the function does nothing. | |
| 138 Otherwise, the function tries to return no later than end_time. | |
| 139 """ | |
| 140 seconds_to_wait = end_time - time.time() | |
| 141 seconds_to_wait = max(0, seconds_to_wait) | |
| 142 if seconds_to_wait == 0: | |
|
dtu
2015/05/07 21:50:36
Instead of doing max(), why not just check seconds
erikchen
2015/05/08 19:47:58
Done.
| |
| 143 return | |
| 144 tab.WaitForDocumentReadyStateToBeComplete(seconds_to_wait) | |
| 145 | |
| 141 def _BatchNavigateTabs(self, batch): | 146 def _BatchNavigateTabs(self, batch): |
| 142 """Performs a batch of tab navigations with minimal delay. | 147 """Performs a batch of tab navigations with minimal delay. |
| 143 | 148 |
| 144 Args: | 149 Args: |
| 145 batch: A list of tuples (tab, url). | 150 batch: A list of tuples (tab, url). |
| 146 | 151 |
| 147 Returns: | 152 Returns: |
| 148 A list of tuples (tab, initial_url). |initial_url| is the url of the | 153 A list of tuples (tab, initial_url). |initial_url| is the url of the |
| 149 |tab| prior to a navigation command being sent to it. | 154 |tab| prior to a navigation command being sent to it. |
| 150 """ | 155 """ |
| 151 timeout_in_seconds = 0 | 156 # Attempting to pass in a timeout of 0 seconds results in a synchronous |
| 157 # socket error from the websocket library. Pass in a very small timeout | |
| 158 # instead so that the websocket library raises a Timeout exception. This | |
| 159 # prevents the logic from accidentally catching different socket | |
| 160 # exceptions. | |
| 161 timeout_in_seconds = 0.01 | |
| 152 | 162 |
| 153 queued_tabs = [] | 163 queued_tabs = [] |
| 154 for tab, url in batch: | 164 for tab, url in batch: |
| 155 initial_url = self._RetrieveTabUrl(tab, | 165 initial_url = self._RetrieveTabUrl(tab, 20) |
| 156 self._TAB_URL_RETRIEVAL_TIMEOUT_IN_SECONDS) | |
| 157 | |
| 158 try: | 166 try: |
| 159 tab.Navigate(url, None, timeout_in_seconds) | 167 tab.Navigate(url, None, timeout_in_seconds) |
| 160 except exceptions.Error: | 168 except exceptions.TimeoutException: |
| 161 # We expect a time out. It's possible for other problems to arise, but | 169 # We expect to receive a timeout exception, since we're not waiting for |
| 162 # this method is not responsible for dealing with them. Ignore all | 170 # the navigation to complete. |
| 163 # exceptions. | |
| 164 pass | 171 pass |
| 165 | |
| 166 queued_tabs.append((tab, initial_url)) | 172 queued_tabs.append((tab, initial_url)) |
| 167 return queued_tabs | 173 return queued_tabs |
| 168 | 174 |
| 169 def _WaitForQueuedTabsToLoad(self, queued_tabs): | 175 def _WaitForQueuedTabsToLoad(self, queued_tabs): |
| 170 """Waits for all the batch navigated tabs to finish loading. | 176 """Waits for all the batch navigated tabs to finish loading. |
| 171 | 177 |
| 172 Args: | 178 Args: |
| 173 queued_tabs: A list of tuples (tab, initial_url). Each tab is guaranteed | 179 queued_tabs: A list of tuples (tab, initial_url). Each tab is guaranteed |
| 174 to have already been sent a navigation command. | 180 to have already been sent a navigation command. |
| 175 """ | 181 """ |
| 176 end_time = time.time() + self._BATCH_PAGE_LOAD_TIMEOUT_IN_SECONDS | 182 total_batch_timeout = (len(queued_tabs) * |
| 183 self._BATCH_TIMEOUT_PER_PAGE_IN_SECONDS) | |
| 184 end_time = time.time() + total_batch_timeout | |
| 177 for tab, initial_url in queued_tabs: | 185 for tab, initial_url in queued_tabs: |
| 186 # Since we didn't wait any time for the tab url navigation to commit, it's | |
| 187 # possible that the tab hasn't started navigating yet. | |
| 188 self._WaitForUrlToChange(tab, initial_url, end_time) | |
| 189 self._WaitForTabToBeReady(tab, end_time) | |
| 190 | |
| 191 # Wait up to 10 seconds for the page to quiesce. If the page hasn't | |
| 192 # quiesced in 10 seconds, it will probably never quiesce. | |
| 178 seconds_to_wait = end_time - time.time() | 193 seconds_to_wait = end_time - time.time() |
| 179 seconds_to_wait = max(0, seconds_to_wait) | 194 seconds_to_wait = max(0, seconds_to_wait) |
| 180 | 195 seconds_to_wait = min(self._TIME_TO_WAIT_FOR_PAGE_TO_QUIESCE_IN_SECONDS, |
| 181 if seconds_to_wait == 0: | 196 seconds_to_wait) |
| 182 break | |
| 183 | |
| 184 # Since we don't wait any time for the tab url navigation to commit, it's | |
| 185 # possible that the tab hasn't started navigating yet. | |
| 186 self._WaitForUrlToChange(tab, initial_url, seconds_to_wait) | |
| 187 | |
| 188 seconds_to_wait = end_time - time.time() | |
| 189 seconds_to_wait = max(0, seconds_to_wait) | |
| 190 | |
| 191 try: | 197 try: |
| 192 tab.WaitForDocumentReadyStateToBeComplete(seconds_to_wait) | 198 util.WaitFor(lambda tab=tab: tab.HasReachedQuiescence(), |
|
dtu
2015/05/07 21:50:36
Just lambda: tab.HasReachedQuiescence()
erikchen
2015/05/08 19:47:58
Actually, there's no need for the lambda either.
| |
| 199 seconds_to_wait) | |
| 193 except exceptions.TimeoutException: | 200 except exceptions.TimeoutException: |
| 194 # Ignore time outs. | |
| 195 pass | 201 pass |
| 196 except exceptions.Error: | |
| 197 # If any error occurs, remove the tab. it's probably in an | |
| 198 # unrecoverable state. | |
| 199 self._RemoveNavigationTab(tab) | |
| 200 | 202 |
| 201 def _GetUrlsToNavigate(self, url_iterator): | 203 def _GetUrlsToNavigate(self, url_iterator): |
| 202 """Returns an array of urls to navigate to, given a url_iterator.""" | 204 """Returns an array of urls to navigate to, given a url_iterator.""" |
| 203 urls = [] | 205 urls = [] |
| 204 for _ in xrange(self._NUM_TABS): | 206 for _ in xrange(self._NUM_TABS): |
| 205 try: | 207 try: |
| 206 urls.append(url_iterator.next()) | 208 urls.append(url_iterator.next()) |
| 207 except StopIteration: | 209 except StopIteration: |
| 208 break | 210 break |
| 209 return urls | 211 return urls |
| (...skipping 17 matching lines...) Expand all Loading... | |
| 227 tab = self._navigation_tabs[i] | 229 tab = self._navigation_tabs[i] |
| 228 batch.append((tab, url)) | 230 batch.append((tab, url)) |
| 229 | 231 |
| 230 queued_tabs = self._BatchNavigateTabs(batch) | 232 queued_tabs = self._BatchNavigateTabs(batch) |
| 231 self._WaitForQueuedTabsToLoad(queued_tabs) | 233 self._WaitForQueuedTabsToLoad(queued_tabs) |
| 232 | 234 |
| 233 self.CleanUpAfterBatchNavigation() | 235 self.CleanUpAfterBatchNavigation() |
| 234 | 236 |
| 235 if self.ShouldExitAfterBatchNavigation(): | 237 if self.ShouldExitAfterBatchNavigation(): |
| 236 break | 238 break |
| OLD | NEW |