OLD | NEW |
1 # Copyright 2015 The Chromium Authors. All rights reserved. | 1 # Copyright 2015 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 import time | 4 import time |
5 | 5 |
6 from telemetry.core import browser_finder | 6 from telemetry.core import browser_finder |
7 from telemetry.core import browser_finder_exceptions | 7 from telemetry.core import browser_finder_exceptions |
8 from telemetry.core import exceptions | 8 from telemetry.core import exceptions |
9 from telemetry.core import platform | 9 from telemetry.core import platform |
10 from telemetry.core import util | 10 from telemetry.core import util |
| 11 from telemetry.core.backends.chrome_inspector import devtools_http |
11 | 12 |
12 | 13 |
13 class FastNavigationProfileExtender(object): | 14 class FastNavigationProfileExtender(object): |
14 """Extends a Chrome profile. | 15 """Extends a Chrome profile. |
15 | 16 |
16 This class creates or extends an existing profile by performing a set of tab | 17 This class creates or extends an existing profile by performing a set of tab |
17 navigations in large batches. This is accomplished by opening a large number | 18 navigations in large batches. This is accomplished by opening a large number |
18 of tabs, simultaneously navigating all the tabs, and then waiting for all the | 19 of tabs, simultaneously navigating all the tabs, and then waiting for all the |
19 tabs to load. This provides two benefits: | 20 tabs to load. This provides two benefits: |
20 - Takes advantage of the high number of logical cores on modern CPUs. | 21 - Takes advantage of the high number of logical cores on modern CPUs. |
(...skipping 12 matching lines...) Expand all Loading... |
33 | 34 |
34 # The path of the profile that the browser will use while it's running. | 35 # The path of the profile that the browser will use while it's running. |
35 # This member is initialized during SetUp(). | 36 # This member is initialized during SetUp(). |
36 self._profile_path = None | 37 self._profile_path = None |
37 | 38 |
38 # A reference to the browser that will be performing all of the tab | 39 # A reference to the browser that will be performing all of the tab |
39 # navigations. | 40 # navigations. |
40 # This member is initialized during SetUp(). | 41 # This member is initialized during SetUp(). |
41 self._browser = None | 42 self._browser = None |
42 | 43 |
| 44 # The instance keeps a list of Tabs that can be navigated successfully. |
| 45 # This means that the Tab is not crashed, and is processing JavaScript in a |
| 46 # timely fashion. |
| 47 self._navigation_tabs = [] |
| 48 |
43 # The number of tabs to use. | 49 # The number of tabs to use. |
44 self._NUM_TABS = maximum_batch_size | 50 self._NUM_TABS = maximum_batch_size |
45 | 51 |
46 # The amount of time to wait for a batch of pages to finish loading. | 52 # The amount of time to wait for a batch of pages to finish loading. |
47 self._BATCH_PAGE_LOAD_TIMEOUT_IN_SECONDS = 10 | 53 self._BATCH_PAGE_LOAD_TIMEOUT_IN_SECONDS = 10 |
48 | 54 |
49 # The default amount of time to wait for the retrieval of the URL of a tab. | 55 # The default amount of time to wait for the retrieval of the URL of a tab. |
50 self._TAB_URL_RETRIEVAL_TIMEOUT_IN_SECONDS = 1 | 56 self._TAB_URL_RETRIEVAL_TIMEOUT_IN_SECONDS = 1 |
51 | 57 |
52 def Run(self, finder_options): | 58 def Run(self, finder_options): |
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
88 implementation. | 94 implementation. |
89 """ | 95 """ |
90 self._profile_path = finder_options.output_profile_path | 96 self._profile_path = finder_options.output_profile_path |
91 possible_browser = self._GetPossibleBrowser(finder_options) | 97 possible_browser = self._GetPossibleBrowser(finder_options) |
92 | 98 |
93 assert possible_browser.supports_tab_control | 99 assert possible_browser.supports_tab_control |
94 assert (platform.GetHostPlatform().GetOSName() in | 100 assert (platform.GetHostPlatform().GetOSName() in |
95 ["win", "mac", "linux"]) | 101 ["win", "mac", "linux"]) |
96 self._browser = possible_browser.Create(finder_options) | 102 self._browser = possible_browser.Create(finder_options) |
97 | 103 |
98 while(len(self._browser.tabs) < self._NUM_TABS): | |
99 self._browser.tabs.New() | |
100 | |
101 def TearDown(self): | 104 def TearDown(self): |
102 """Teardown that is guaranteed to be executed before the instance is | 105 """Teardown that is guaranteed to be executed before the instance is |
103 destroyed. | 106 destroyed. |
104 | 107 |
105 Can be overridden by subclasses. Subclasses must call the super class | 108 Can be overridden by subclasses. Subclasses must call the super class |
106 implementation. | 109 implementation. |
107 """ | 110 """ |
108 if self._browser: | 111 if self._browser: |
109 self._browser.Close() | 112 self._browser.Close() |
110 self._browser = None | 113 self._browser = None |
111 | 114 |
112 def CleanUpAfterBatchNavigation(self): | 115 def CleanUpAfterBatchNavigation(self): |
113 """A hook for subclasses to perform cleanup after each batch of | 116 """A hook for subclasses to perform cleanup after each batch of |
114 navigations. | 117 navigations. |
115 | 118 |
116 Can be overridden by subclasses. | 119 Can be overridden by subclasses. |
117 """ | 120 """ |
118 pass | 121 pass |
119 | 122 |
120 @property | 123 @property |
121 def profile_path(self): | 124 def profile_path(self): |
122 return self._profile_path | 125 return self._profile_path |
123 | 126 |
| 127 def _RefreshNavigationTabs(self): |
| 128 """Updates the member self._navigation_tabs to contain self._NUM_TABS |
| 129 elements, each of which is not crashed. The crashed tabs are intentionally |
| 130 leaked, since Telemetry doesn't have a good way of killing crashed tabs. |
| 131 |
| 132 It is also possible for a tab to be stalled in an infinite JavaScript loop. |
| 133 These tabs will be in self._browser.tabs, but not in self._navigation_tabs. |
| 134 There is no way to kill these tabs, so they are also leaked. This method is |
| 135 careful to only use tabs in self._navigation_tabs, or newly created tabs. |
| 136 """ |
| 137 live_tabs = [tab for tab in self._navigation_tabs if tab.IsAlive()] |
| 138 self._navigation_tabs = live_tabs |
| 139 |
| 140 while len(self._navigation_tabs) < self._NUM_TABS: |
| 141 self._navigation_tabs.append(self._browser.tabs.New()) |
| 142 |
| 143 def _RemoveNavigationTab(self, tab): |
| 144 """Removes a tab which is no longer in a useable state from |
| 145 self._navigation_tabs. The tab is not removed from self._browser.tabs, |
| 146 since there is no guarantee that the tab can be safely removed.""" |
| 147 self._navigation_tabs.remove(tab) |
| 148 |
124 def _GetPossibleBrowser(self, finder_options): | 149 def _GetPossibleBrowser(self, finder_options): |
125 """Return a possible_browser with the given options.""" | 150 """Return a possible_browser with the given options.""" |
126 possible_browser = browser_finder.FindBrowser(finder_options) | 151 possible_browser = browser_finder.FindBrowser(finder_options) |
127 if not possible_browser: | 152 if not possible_browser: |
128 raise browser_finder_exceptions.BrowserFinderException( | 153 raise browser_finder_exceptions.BrowserFinderException( |
129 'No browser found.\n\nAvailable browsers:\n%s\n' % | 154 'No browser found.\n\nAvailable browsers:\n%s\n' % |
130 '\n'.join(browser_finder.GetAllAvailableBrowserTypes(finder_options))) | 155 '\n'.join(browser_finder.GetAllAvailableBrowserTypes(finder_options))) |
131 finder_options.browser_options.browser_type = ( | 156 finder_options.browser_options.browser_type = ( |
132 possible_browser.browser_type) | 157 possible_browser.browser_type) |
133 | 158 |
134 return possible_browser | 159 return possible_browser |
135 | 160 |
136 def _RetrieveTabUrl(self, tab, timeout): | 161 def _RetrieveTabUrl(self, tab, timeout): |
137 """Retrives the URL of the tab.""" | 162 """Retrives the URL of the tab.""" |
138 try: | 163 try: |
139 return tab.EvaluateJavaScript('document.URL', timeout) | 164 return tab.EvaluateJavaScript('document.URL', timeout) |
140 except exceptions.DevtoolsTargetCrashException: | 165 except (exceptions.DevtoolsTargetCrashException, |
| 166 devtools_http.DevToolsClientConnectionError, |
| 167 devtools_http.DevToolsClientUrlError): |
141 return None | 168 return None |
142 | 169 |
143 def _WaitForUrlToChange(self, tab, initial_url, timeout): | 170 def _WaitForUrlToChange(self, tab, initial_url, timeout): |
144 """Waits for the tab to navigate away from its initial url.""" | 171 """Waits for the tab to navigate away from its initial url.""" |
145 end_time = time.time() + timeout | 172 end_time = time.time() + timeout |
146 while True: | 173 while True: |
147 seconds_to_wait = end_time - time.time() | 174 seconds_to_wait = end_time - time.time() |
148 seconds_to_wait = max(0, seconds_to_wait) | 175 seconds_to_wait = max(0, seconds_to_wait) |
149 | 176 |
150 if seconds_to_wait == 0: | 177 if seconds_to_wait == 0: |
(...skipping 20 matching lines...) Expand all Loading... |
171 """ | 198 """ |
172 timeout_in_seconds = 0 | 199 timeout_in_seconds = 0 |
173 | 200 |
174 queued_tabs = [] | 201 queued_tabs = [] |
175 for tab, url in batch: | 202 for tab, url in batch: |
176 initial_url = self._RetrieveTabUrl(tab, | 203 initial_url = self._RetrieveTabUrl(tab, |
177 self._TAB_URL_RETRIEVAL_TIMEOUT_IN_SECONDS) | 204 self._TAB_URL_RETRIEVAL_TIMEOUT_IN_SECONDS) |
178 | 205 |
179 try: | 206 try: |
180 tab.Navigate(url, None, timeout_in_seconds) | 207 tab.Navigate(url, None, timeout_in_seconds) |
181 except exceptions.DevtoolsTargetCrashException: | 208 except (exceptions.DevtoolsTargetCrashException, |
182 # We expect a time out, and don't mind if the webpage crashes. Ignore | 209 devtools_http.DevToolsClientConnectionError, |
183 # both exceptions. | 210 devtools_http.DevToolsClientUrlError): |
| 211 # We expect a time out. It's possible for other problems to arise, but |
| 212 # this method is not responsible for dealing with them. Ignore all |
| 213 # exceptions. |
184 pass | 214 pass |
185 | 215 |
186 queued_tabs.append((tab, initial_url)) | 216 queued_tabs.append((tab, initial_url)) |
187 return queued_tabs | 217 return queued_tabs |
188 | 218 |
189 def _WaitForQueuedTabsToLoad(self, queued_tabs): | 219 def _WaitForQueuedTabsToLoad(self, queued_tabs): |
190 """Waits for all the batch navigated tabs to finish loading. | 220 """Waits for all the batch navigated tabs to finish loading. |
191 | 221 |
192 Args: | 222 Args: |
193 queued_tabs: A list of tuples (tab, initial_url). Each tab is guaranteed | 223 queued_tabs: A list of tuples (tab, initial_url). Each tab is guaranteed |
194 to have already been sent a navigation command. | 224 to have already been sent a navigation command. |
195 """ | 225 """ |
196 end_time = time.time() + self._BATCH_PAGE_LOAD_TIMEOUT_IN_SECONDS | 226 end_time = time.time() + self._BATCH_PAGE_LOAD_TIMEOUT_IN_SECONDS |
197 for tab, initial_url in queued_tabs: | 227 for tab, initial_url in queued_tabs: |
198 seconds_to_wait = end_time - time.time() | 228 seconds_to_wait = end_time - time.time() |
199 seconds_to_wait = max(0, seconds_to_wait) | 229 seconds_to_wait = max(0, seconds_to_wait) |
200 | 230 |
201 if seconds_to_wait == 0: | 231 if seconds_to_wait == 0: |
202 break | 232 break |
203 | 233 |
204 # Since we don't wait any time for the tab url navigation to commit, it's | 234 # Since we don't wait any time for the tab url navigation to commit, it's |
205 # possible that the tab hasn't started navigating yet. | 235 # possible that the tab hasn't started navigating yet. |
206 self._WaitForUrlToChange(tab, initial_url, seconds_to_wait) | 236 self._WaitForUrlToChange(tab, initial_url, seconds_to_wait) |
207 | 237 |
208 seconds_to_wait = end_time - time.time() | 238 seconds_to_wait = end_time - time.time() |
209 seconds_to_wait = max(0, seconds_to_wait) | 239 seconds_to_wait = max(0, seconds_to_wait) |
210 | 240 |
211 try: | 241 try: |
212 tab.WaitForDocumentReadyStateToBeComplete(seconds_to_wait) | 242 tab.WaitForDocumentReadyStateToBeComplete(seconds_to_wait) |
213 except (util.TimeoutException, exceptions.DevtoolsTargetCrashException): | 243 except util.TimeoutException: |
214 # Ignore time outs and web page crashes. | 244 # Ignore time outs. |
215 pass | 245 pass |
| 246 except (exceptions.DevtoolsTargetCrashException, |
| 247 devtools_http.DevToolsClientConnectionError, |
| 248 devtools_http.DevToolsClientUrlError): |
| 249 # If any error occurs, remove the tab. it's probably in an |
| 250 # unrecoverable state. |
| 251 self._RemoveNavigationTab(tab) |
216 | 252 |
217 def _GetUrlsToNavigate(self, url_iterator): | 253 def _GetUrlsToNavigate(self, url_iterator): |
218 """Returns an array of urls to navigate to, given a url_iterator.""" | 254 """Returns an array of urls to navigate to, given a url_iterator.""" |
219 urls = [] | 255 urls = [] |
220 for _ in xrange(self._NUM_TABS): | 256 for _ in xrange(self._NUM_TABS): |
221 try: | 257 try: |
222 urls.append(url_iterator.next()) | 258 urls.append(url_iterator.next()) |
223 except StopIteration: | 259 except StopIteration: |
224 break | 260 break |
225 return urls | 261 return urls |
226 | 262 |
227 def _PerformNavigations(self): | 263 def _PerformNavigations(self): |
228 """Repeatedly fetches a batch of urls, and navigates to those urls. This | 264 """Repeatedly fetches a batch of urls, and navigates to those urls. This |
229 will run until an empty batch is returned, or | 265 will run until an empty batch is returned, or |
230 ShouldExitAfterBatchNavigation() returns True. | 266 ShouldExitAfterBatchNavigation() returns True. |
231 """ | 267 """ |
232 url_iterator = self.GetUrlIterator() | 268 url_iterator = self.GetUrlIterator() |
233 while True: | 269 while True: |
| 270 self._RefreshNavigationTabs() |
234 urls = self._GetUrlsToNavigate(url_iterator) | 271 urls = self._GetUrlsToNavigate(url_iterator) |
235 | 272 |
236 if len(urls) == 0: | 273 if len(urls) == 0: |
237 break | 274 break |
238 | 275 |
239 batch = [] | 276 batch = [] |
240 for i in range(len(urls)): | 277 for i in range(len(urls)): |
241 url = urls[i] | 278 url = urls[i] |
242 tab = self._browser.tabs[i] | 279 tab = self._navigation_tabs[i] |
243 batch.append((tab, url)) | 280 batch.append((tab, url)) |
244 | 281 |
245 queued_tabs = self._BatchNavigateTabs(batch) | 282 queued_tabs = self._BatchNavigateTabs(batch) |
246 self._WaitForQueuedTabsToLoad(queued_tabs) | 283 self._WaitForQueuedTabsToLoad(queued_tabs) |
247 | 284 |
248 self.CleanUpAfterBatchNavigation() | 285 self.CleanUpAfterBatchNavigation() |
249 | 286 |
250 if self.ShouldExitAfterBatchNavigation(): | 287 if self.ShouldExitAfterBatchNavigation(): |
251 break | 288 break |
OLD | NEW |