OLD | NEW |
1 # Copyright 2015 The Chromium Authors. All rights reserved. | 1 # Copyright 2015 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 import time | 4 import time |
5 | 5 |
6 from profile_creators import profile_extender | 6 from profile_creators import profile_extender |
7 from telemetry.core import exceptions | 7 from telemetry.core import exceptions |
| 8 from telemetry.core import util |
8 | 9 |
9 | 10 |
10 class FastNavigationProfileExtender(profile_extender.ProfileExtender): | 11 class FastNavigationProfileExtender(profile_extender.ProfileExtender): |
11 """Extends a Chrome profile. | 12 """Extends a Chrome profile. |
12 | 13 |
13 This class creates or extends an existing profile by performing a set of tab | 14 This class creates or extends an existing profile by performing a set of tab |
14 navigations in large batches. This is accomplished by opening a large number | 15 navigations in large batches. This is accomplished by opening a large number |
15 of tabs, simultaneously navigating all the tabs, and then waiting for all the | 16 of tabs, simultaneously navigating all the tabs, and then waiting for all the |
16 tabs to load. This provides two benefits: | 17 tabs to load. This provides two benefits: |
17 - Takes advantage of the high number of logical cores on modern CPUs. | 18 - Takes advantage of the high number of logical cores on modern CPUs. |
(...skipping 11 matching lines...) Expand all Loading... |
29 super(FastNavigationProfileExtender, self).__init__(finder_options) | 30 super(FastNavigationProfileExtender, self).__init__(finder_options) |
30 | 31 |
31 # The instance keeps a list of Tabs that can be navigated successfully. | 32 # The instance keeps a list of Tabs that can be navigated successfully. |
32 # This means that the Tab is not crashed, and is processing JavaScript in a | 33 # This means that the Tab is not crashed, and is processing JavaScript in a |
33 # timely fashion. | 34 # timely fashion. |
34 self._navigation_tabs = [] | 35 self._navigation_tabs = [] |
35 | 36 |
36 # The number of tabs to use. | 37 # The number of tabs to use. |
37 self._NUM_TABS = maximum_batch_size | 38 self._NUM_TABS = maximum_batch_size |
38 | 39 |
39 # The amount of time to wait for a batch of pages to finish loading. | 40 # The amount of additional time to wait for a batch of pages to finish |
40 self._BATCH_PAGE_LOAD_TIMEOUT_IN_SECONDS = 10 | 41 # loading for each page in the batch. |
| 42 self._BATCH_TIMEOUT_PER_PAGE_IN_SECONDS = 20 |
41 | 43 |
42 # The default amount of time to wait for the retrieval of the URL of a tab. | 44 # The amount of time to wait for a page to quiesce. Some pages will never |
43 self._TAB_URL_RETRIEVAL_TIMEOUT_IN_SECONDS = 1 | 45 # quiesce. |
| 46 self._TIME_TO_WAIT_FOR_PAGE_TO_QUIESCE_IN_SECONDS = 10 |
44 | 47 |
45 def Run(self): | 48 def Run(self): |
46 """Superclass override.""" | 49 """Superclass override.""" |
47 try: | 50 try: |
48 self.SetUpBrowser() | 51 self.SetUpBrowser() |
49 self._PerformNavigations() | 52 self._PerformNavigations() |
50 finally: | 53 finally: |
51 self.TearDownBrowser() | 54 self.TearDownBrowser() |
52 | 55 |
| 56 # When there hasn't been an exception, verify that the profile was |
| 57 # correctly extended. |
| 58 # TODO(erikchen): I've intentionally omitted my implementation of |
| 59 # VerifyProfileWasExtended() in small_profile_extender, since the profile |
| 60 # is not being correctly extended. http://crbug.com/484833 |
| 61 # http://crbug.com/484880 |
| 62 self.VerifyProfileWasExtended() |
| 63 |
| 64 def VerifyProfileWasExtended(self): |
| 65 """Verifies that the profile was correctly extended. |
| 66 |
| 67 Can be overridden by subclasses. |
| 68 """ |
| 69 pass |
| 70 |
53 def GetUrlIterator(self): | 71 def GetUrlIterator(self): |
54 """Gets URLs for the browser to navigate to. | 72 """Gets URLs for the browser to navigate to. |
55 | 73 |
56 Intended for subclass override. | 74 Intended for subclass override. |
57 | 75 |
58 Returns: | 76 Returns: |
59 An iterator whose elements are urls to be navigated to. | 77 An iterator whose elements are urls to be navigated to. |
60 """ | 78 """ |
61 raise NotImplementedError() | 79 raise NotImplementedError() |
62 | 80 |
63 def ShouldExitAfterBatchNavigation(self): | 81 def ShouldExitAfterBatchNavigation(self): |
64 """Returns a boolean indicating whether profile extension is finished. | 82 """Returns a boolean indicating whether profile extension is finished. |
65 | 83 |
66 Intended for subclass override. | 84 Intended for subclass override. |
67 """ | 85 """ |
68 raise NotImplementedError() | 86 raise NotImplementedError() |
69 | 87 |
70 def CleanUpAfterBatchNavigation(self): | 88 def CleanUpAfterBatchNavigation(self): |
71 """A hook for subclasses to perform cleanup after each batch of | 89 """A hook for subclasses to perform cleanup after each batch of |
72 navigations. | 90 navigations. |
73 | 91 |
74 Can be overridden by subclasses. | 92 Can be overridden by subclasses. |
75 """ | 93 """ |
76 pass | 94 pass |
77 | 95 |
78 def _AddNewTab(self): | |
79 """Adds a new tab to the browser.""" | |
80 | |
81 # Adding a new tab requires making a request over devtools. This can fail | |
82 # for a variety of reasons. Retry 3 times. | |
83 retry_count = 3 | |
84 for i in range(retry_count): | |
85 try: | |
86 self._navigation_tabs.append(self._browser.tabs.New()) | |
87 except exceptions.Error: | |
88 if i == retry_count - 1: | |
89 raise | |
90 else: | |
91 break | |
92 | |
93 def _RefreshNavigationTabs(self): | 96 def _RefreshNavigationTabs(self): |
94 """Updates the member self._navigation_tabs to contain self._NUM_TABS | 97 """Updates the member self._navigation_tabs to contain self._NUM_TABS |
95 elements, each of which is not crashed. The crashed tabs are intentionally | 98 elements, each of which is not crashed. The crashed tabs are intentionally |
96 leaked, since Telemetry doesn't have a good way of killing crashed tabs. | 99 leaked, since Telemetry doesn't have a good way of killing crashed tabs. |
97 | 100 |
98 It is also possible for a tab to be stalled in an infinite JavaScript loop. | 101 It is also possible for a tab to be stalled in an infinite JavaScript loop. |
99 These tabs will be in self.browser.tabs, but not in self._navigation_tabs. | 102 These tabs will be in self.browser.tabs, but not in self._navigation_tabs. |
100 There is no way to kill these tabs, so they are also leaked. This method is | 103 There is no way to kill these tabs, so they are also leaked. This method is |
101 careful to only use tabs in self._navigation_tabs, or newly created tabs. | 104 careful to only use tabs in self._navigation_tabs, or newly created tabs. |
102 """ | 105 """ |
103 live_tabs = [tab for tab in self._navigation_tabs if tab.IsAlive()] | 106 live_tabs = [tab for tab in self._navigation_tabs if tab.IsAlive()] |
104 self._navigation_tabs = live_tabs | 107 self._navigation_tabs = live_tabs |
105 | 108 |
106 while len(self._navigation_tabs) < self._NUM_TABS: | 109 while len(self._navigation_tabs) < self._NUM_TABS: |
107 self._AddNewTab() | 110 self._navigation_tabs.append(self._browser.tabs.New()) |
108 | 111 |
109 def _RemoveNavigationTab(self, tab): | 112 def _RemoveNavigationTab(self, tab): |
110 """Removes a tab which is no longer in a useable state from | 113 """Removes a tab which is no longer in a useable state from |
111 self._navigation_tabs. The tab is not removed from self.browser.tabs, | 114 self._navigation_tabs. The tab is not removed from self.browser.tabs, |
112 since there is no guarantee that the tab can be safely removed.""" | 115 since there is no guarantee that the tab can be safely removed.""" |
113 self._navigation_tabs.remove(tab) | 116 self._navigation_tabs.remove(tab) |
114 | 117 |
115 def _RetrieveTabUrl(self, tab, timeout): | 118 def _RetrieveTabUrl(self, tab, timeout): |
116 """Retrives the URL of the tab.""" | 119 """Retrives the URL of the tab.""" |
117 try: | 120 # TODO(erikchen): Use tab.url instead, which talks to the browser process |
118 return tab.EvaluateJavaScript('document.URL', timeout) | 121 # instead of the renderer process. http://crbug.com/486119 |
119 except exceptions.Error: | 122 return tab.EvaluateJavaScript('document.URL', timeout) |
120 return None | |
121 | 123 |
122 def _WaitForUrlToChange(self, tab, initial_url, timeout): | 124 def _WaitForUrlToChange(self, tab, initial_url, end_time): |
123 """Waits for the tab to navigate away from its initial url.""" | 125 """Waits for the tab to navigate away from its initial url. |
124 end_time = time.time() + timeout | 126 |
| 127 If time.time() is larger than end_time, the function does nothing. |
| 128 Otherwise, the function tries to return no later than end_time. |
| 129 """ |
125 while True: | 130 while True: |
126 seconds_to_wait = end_time - time.time() | 131 seconds_to_wait = end_time - time.time() |
127 seconds_to_wait = max(0, seconds_to_wait) | 132 if seconds_to_wait <= 0: |
128 | |
129 if seconds_to_wait == 0: | |
130 break | 133 break |
131 | 134 |
132 current_url = self._RetrieveTabUrl(tab, seconds_to_wait) | 135 current_url = self._RetrieveTabUrl(tab, seconds_to_wait) |
133 if current_url != initial_url: | 136 if current_url != initial_url and current_url != "": |
134 break | 137 break |
135 | 138 |
136 # Retrieving the current url is a non-trivial operation. Add a small | 139 # Retrieving the current url is a non-trivial operation. Add a small |
137 # sleep here to prevent this method from contending with the actual | 140 # sleep here to prevent this method from contending with the actual |
138 # navigation. | 141 # navigation. |
139 time.sleep(0.01) | 142 time.sleep(0.01) |
140 | 143 |
| 144 def _WaitForTabToBeReady(self, tab, end_time): |
| 145 """Waits for the tab to be ready. |
| 146 |
| 147 If time.time() is larger than end_time, the function does nothing. |
| 148 Otherwise, the function tries to return no later than end_time. |
| 149 """ |
| 150 seconds_to_wait = end_time - time.time() |
| 151 if seconds_to_wait <= 0: |
| 152 return |
| 153 tab.WaitForDocumentReadyStateToBeComplete(seconds_to_wait) |
| 154 |
| 155 # Wait up to 10 seconds for the page to quiesce. If the page hasn't |
| 156 # quiesced in 10 seconds, it will probably never quiesce. |
| 157 seconds_to_wait = end_time - time.time() |
| 158 seconds_to_wait = max(0, seconds_to_wait) |
| 159 try: |
| 160 util.WaitFor(tab.HasReachedQuiescence, seconds_to_wait) |
| 161 except exceptions.TimeoutException: |
| 162 pass |
| 163 |
141 def _BatchNavigateTabs(self, batch): | 164 def _BatchNavigateTabs(self, batch): |
142 """Performs a batch of tab navigations with minimal delay. | 165 """Performs a batch of tab navigations with minimal delay. |
143 | 166 |
144 Args: | 167 Args: |
145 batch: A list of tuples (tab, url). | 168 batch: A list of tuples (tab, url). |
146 | 169 |
147 Returns: | 170 Returns: |
148 A list of tuples (tab, initial_url). |initial_url| is the url of the | 171 A list of tuples (tab, initial_url). |initial_url| is the url of the |
149 |tab| prior to a navigation command being sent to it. | 172 |tab| prior to a navigation command being sent to it. |
150 """ | 173 """ |
151 timeout_in_seconds = 0 | 174 # Attempting to pass in a timeout of 0 seconds results in a synchronous |
| 175 # socket error from the websocket library. Pass in a very small timeout |
| 176 # instead so that the websocket library raises a Timeout exception. This |
| 177 # prevents the logic from accidentally catching different socket |
| 178 # exceptions. |
| 179 timeout_in_seconds = 0.01 |
152 | 180 |
153 queued_tabs = [] | 181 queued_tabs = [] |
154 for tab, url in batch: | 182 for tab, url in batch: |
155 initial_url = self._RetrieveTabUrl(tab, | 183 initial_url = self._RetrieveTabUrl(tab, 20) |
156 self._TAB_URL_RETRIEVAL_TIMEOUT_IN_SECONDS) | |
157 | |
158 try: | 184 try: |
159 tab.Navigate(url, None, timeout_in_seconds) | 185 tab.Navigate(url, None, timeout_in_seconds) |
160 except exceptions.Error: | 186 except exceptions.TimeoutException: |
161 # We expect a time out. It's possible for other problems to arise, but | 187 # We expect to receive a timeout exception, since we're not waiting for |
162 # this method is not responsible for dealing with them. Ignore all | 188 # the navigation to complete. |
163 # exceptions. | |
164 pass | 189 pass |
165 | |
166 queued_tabs.append((tab, initial_url)) | 190 queued_tabs.append((tab, initial_url)) |
167 return queued_tabs | 191 return queued_tabs |
168 | 192 |
169 def _WaitForQueuedTabsToLoad(self, queued_tabs): | 193 def _WaitForQueuedTabsToLoad(self, queued_tabs): |
170 """Waits for all the batch navigated tabs to finish loading. | 194 """Waits for all the batch navigated tabs to finish loading. |
171 | 195 |
172 Args: | 196 Args: |
173 queued_tabs: A list of tuples (tab, initial_url). Each tab is guaranteed | 197 queued_tabs: A list of tuples (tab, initial_url). Each tab is guaranteed |
174 to have already been sent a navigation command. | 198 to have already been sent a navigation command. |
175 """ | 199 """ |
176 end_time = time.time() + self._BATCH_PAGE_LOAD_TIMEOUT_IN_SECONDS | 200 total_batch_timeout = (len(queued_tabs) * |
| 201 self._BATCH_TIMEOUT_PER_PAGE_IN_SECONDS) |
| 202 end_time = time.time() + total_batch_timeout |
177 for tab, initial_url in queued_tabs: | 203 for tab, initial_url in queued_tabs: |
178 seconds_to_wait = end_time - time.time() | 204 # Since we didn't wait any time for the tab url navigation to commit, it's |
179 seconds_to_wait = max(0, seconds_to_wait) | |
180 | |
181 if seconds_to_wait == 0: | |
182 break | |
183 | |
184 # Since we don't wait any time for the tab url navigation to commit, it's | |
185 # possible that the tab hasn't started navigating yet. | 205 # possible that the tab hasn't started navigating yet. |
186 self._WaitForUrlToChange(tab, initial_url, seconds_to_wait) | 206 self._WaitForUrlToChange(tab, initial_url, end_time) |
187 | 207 self._WaitForTabToBeReady(tab, end_time) |
188 seconds_to_wait = end_time - time.time() | |
189 seconds_to_wait = max(0, seconds_to_wait) | |
190 | |
191 try: | |
192 tab.WaitForDocumentReadyStateToBeComplete(seconds_to_wait) | |
193 except exceptions.TimeoutException: | |
194 # Ignore time outs. | |
195 pass | |
196 except exceptions.Error: | |
197 # If any error occurs, remove the tab. it's probably in an | |
198 # unrecoverable state. | |
199 self._RemoveNavigationTab(tab) | |
200 | 208 |
201 def _GetUrlsToNavigate(self, url_iterator): | 209 def _GetUrlsToNavigate(self, url_iterator): |
202 """Returns an array of urls to navigate to, given a url_iterator.""" | 210 """Returns an array of urls to navigate to, given a url_iterator.""" |
203 urls = [] | 211 urls = [] |
204 for _ in xrange(self._NUM_TABS): | 212 for _ in xrange(self._NUM_TABS): |
205 try: | 213 try: |
206 urls.append(url_iterator.next()) | 214 urls.append(url_iterator.next()) |
207 except StopIteration: | 215 except StopIteration: |
208 break | 216 break |
209 return urls | 217 return urls |
(...skipping 17 matching lines...) Expand all Loading... |
227 tab = self._navigation_tabs[i] | 235 tab = self._navigation_tabs[i] |
228 batch.append((tab, url)) | 236 batch.append((tab, url)) |
229 | 237 |
230 queued_tabs = self._BatchNavigateTabs(batch) | 238 queued_tabs = self._BatchNavigateTabs(batch) |
231 self._WaitForQueuedTabsToLoad(queued_tabs) | 239 self._WaitForQueuedTabsToLoad(queued_tabs) |
232 | 240 |
233 self.CleanUpAfterBatchNavigation() | 241 self.CleanUpAfterBatchNavigation() |
234 | 242 |
235 if self.ShouldExitAfterBatchNavigation(): | 243 if self.ShouldExitAfterBatchNavigation(): |
236 break | 244 break |
OLD | NEW |