OLD | NEW |
---|---|
1 # Copyright 2015 The Chromium Authors. All rights reserved. | 1 # Copyright 2015 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 import time | 4 import time |
5 | 5 |
6 from profile_creators import profile_extender | 6 from profile_creators import profile_extender |
7 from telemetry.core import exceptions | 7 from telemetry.core import exceptions |
8 from telemetry.core import util | |
8 | 9 |
9 | 10 |
10 class FastNavigationProfileExtender(profile_extender.ProfileExtender): | 11 class FastNavigationProfileExtender(profile_extender.ProfileExtender): |
11 """Extends a Chrome profile. | 12 """Extends a Chrome profile. |
12 | 13 |
13 This class creates or extends an existing profile by performing a set of tab | 14 This class creates or extends an existing profile by performing a set of tab |
14 navigations in large batches. This is accomplished by opening a large number | 15 navigations in large batches. This is accomplished by opening a large number |
15 of tabs, simultaneously navigating all the tabs, and then waiting for all the | 16 of tabs, simultaneously navigating all the tabs, and then waiting for all the |
16 tabs to load. This provides two benefits: | 17 tabs to load. This provides two benefits: |
17 - Takes advantage of the high number of logical cores on modern CPUs. | 18 - Takes advantage of the high number of logical cores on modern CPUs. |
(...skipping 11 matching lines...) Expand all Loading... | |
29 super(FastNavigationProfileExtender, self).__init__(finder_options) | 30 super(FastNavigationProfileExtender, self).__init__(finder_options) |
30 | 31 |
31 # The instance keeps a list of Tabs that can be navigated successfully. | 32 # The instance keeps a list of Tabs that can be navigated successfully. |
32 # This means that the Tab is not crashed, and is processing JavaScript in a | 33 # This means that the Tab is not crashed, and is processing JavaScript in a |
33 # timely fashion. | 34 # timely fashion. |
34 self._navigation_tabs = [] | 35 self._navigation_tabs = [] |
35 | 36 |
36 # The number of tabs to use. | 37 # The number of tabs to use. |
37 self._NUM_TABS = maximum_batch_size | 38 self._NUM_TABS = maximum_batch_size |
38 | 39 |
39 # The amount of time to wait for a batch of pages to finish loading. | 40 # The amount of additional time to wait for a batch of pages to finish |
40 self._BATCH_PAGE_LOAD_TIMEOUT_IN_SECONDS = 10 | 41 # loading for each page in the batch. |
42 self._BATCH_TIMEOUT_PER_PAGE_IN_SECONDS = 20 | |
41 | 43 |
42 # The default amount of time to wait for the retrieval of the URL of a tab. | 44 # The amount of time to wait for a page to quiesce. Some pages will never |
43 self._TAB_URL_RETRIEVAL_TIMEOUT_IN_SECONDS = 1 | 45 # quiesce. |
46 self._TIME_TO_WAIT_FOR_PAGE_TO_QUIESCE_IN_SECONDS = 10 | |
44 | 47 |
45 def Run(self): | 48 def Run(self): |
46 """Superclass override.""" | 49 """Superclass override.""" |
47 try: | 50 try: |
48 self.SetUpBrowser() | 51 self.SetUpBrowser() |
49 self._PerformNavigations() | 52 self._PerformNavigations() |
50 finally: | 53 finally: |
51 self.TearDownBrowser() | 54 self.TearDownBrowser() |
52 | 55 |
56 # When there hasn't been an exception, verify that the profile was | |
57 # correctly extended. | |
58 # TODO(erikchen): I've intentionally omitted my implementation of | |
59 # VerifyProfileWasExtended() in small_profile_extender, since the profile | |
60 # is not being correctly extended. http://crbug.com/484833 | |
61 # http://crbug.com/484880 | |
62 self.VerifyProfileWasExtended() | |
63 | |
53 def GetUrlIterator(self): | 64 def GetUrlIterator(self): |
54 """Gets URLs for the browser to navigate to. | 65 """Gets URLs for the browser to navigate to. |
55 | 66 |
56 Intended for subclass override. | 67 Intended for subclass override. |
57 | 68 |
58 Returns: | 69 Returns: |
59 An iterator whose elements are urls to be navigated to. | 70 An iterator whose elements are urls to be navigated to. |
60 """ | 71 """ |
61 raise NotImplementedError() | 72 raise NotImplementedError() |
62 | 73 |
63 def ShouldExitAfterBatchNavigation(self): | 74 def ShouldExitAfterBatchNavigation(self): |
64 """Returns a boolean indicating whether profile extension is finished. | 75 """Returns a boolean indicating whether profile extension is finished. |
65 | 76 |
66 Intended for subclass override. | 77 Intended for subclass override. |
67 """ | 78 """ |
68 raise NotImplementedError() | 79 raise NotImplementedError() |
69 | 80 |
70 def CleanUpAfterBatchNavigation(self): | 81 def CleanUpAfterBatchNavigation(self): |
71 """A hook for subclasses to perform cleanup after each batch of | 82 """A hook for subclasses to perform cleanup after each batch of |
72 navigations. | 83 navigations. |
73 | 84 |
74 Can be overridden by subclasses. | 85 Can be overridden by subclasses. |
75 """ | 86 """ |
76 pass | 87 pass |
77 | 88 |
78 def _AddNewTab(self): | 89 def _AddNewTab(self): |
nednguyen
2015/05/12 16:07:43
The content of this method can just be moved to _
erikchen
2015/05/12 18:43:48
Done.
| |
79 """Adds a new tab to the browser.""" | 90 """Adds a new tab to the browser.""" |
80 | 91 self._navigation_tabs.append(self._browser.tabs.New()) |
81 # Adding a new tab requires making a request over devtools. This can fail | |
82 # for a variety of reasons. Retry 3 times. | |
83 retry_count = 3 | |
84 for i in range(retry_count): | |
85 try: | |
86 self._navigation_tabs.append(self._browser.tabs.New()) | |
87 except exceptions.Error: | |
88 if i == retry_count - 1: | |
89 raise | |
90 else: | |
91 break | |
92 | 92 |
93 def _RefreshNavigationTabs(self): | 93 def _RefreshNavigationTabs(self): |
94 """Updates the member self._navigation_tabs to contain self._NUM_TABS | 94 """Updates the member self._navigation_tabs to contain self._NUM_TABS |
95 elements, each of which is not crashed. The crashed tabs are intentionally | 95 elements, each of which is not crashed. The crashed tabs are intentionally |
96 leaked, since Telemetry doesn't have a good way of killing crashed tabs. | 96 leaked, since Telemetry doesn't have a good way of killing crashed tabs. |
97 | 97 |
98 It is also possible for a tab to be stalled in an infinite JavaScript loop. | 98 It is also possible for a tab to be stalled in an infinite JavaScript loop. |
99 These tabs will be in self.browser.tabs, but not in self._navigation_tabs. | 99 These tabs will be in self.browser.tabs, but not in self._navigation_tabs. |
100 There is no way to kill these tabs, so they are also leaked. This method is | 100 There is no way to kill these tabs, so they are also leaked. This method is |
101 careful to only use tabs in self._navigation_tabs, or newly created tabs. | 101 careful to only use tabs in self._navigation_tabs, or newly created tabs. |
102 """ | 102 """ |
103 live_tabs = [tab for tab in self._navigation_tabs if tab.IsAlive()] | 103 live_tabs = [tab for tab in self._navigation_tabs if tab.IsAlive()] |
104 self._navigation_tabs = live_tabs | 104 self._navigation_tabs = live_tabs |
105 | 105 |
106 while len(self._navigation_tabs) < self._NUM_TABS: | 106 while len(self._navigation_tabs) < self._NUM_TABS: |
107 self._AddNewTab() | 107 self._AddNewTab() |
108 | 108 |
109 def _RemoveNavigationTab(self, tab): | 109 def _RemoveNavigationTab(self, tab): |
110 """Removes a tab which is no longer in a useable state from | 110 """Removes a tab which is no longer in a useable state from |
111 self._navigation_tabs. The tab is not removed from self.browser.tabs, | 111 self._navigation_tabs. The tab is not removed from self.browser.tabs, |
112 since there is no guarantee that the tab can be safely removed.""" | 112 since there is no guarantee that the tab can be safely removed.""" |
113 self._navigation_tabs.remove(tab) | 113 self._navigation_tabs.remove(tab) |
114 | 114 |
115 def _RetrieveTabUrl(self, tab, timeout): | 115 def _RetrieveTabUrl(self, tab, timeout): |
116 """Retrives the URL of the tab.""" | 116 """Retrives the URL of the tab.""" |
117 try: | 117 # TODO(erikchen): Use tab.url instead, which talks to the browser process |
118 return tab.EvaluateJavaScript('document.URL', timeout) | 118 # instead of the renderer process. http://crbug.com/486119 |
119 except exceptions.Error: | 119 return tab.EvaluateJavaScript('document.URL', timeout) |
120 return None | |
121 | 120 |
122 def _WaitForUrlToChange(self, tab, initial_url, timeout): | 121 def _WaitForUrlToChange(self, tab, initial_url, end_time): |
123 """Waits for the tab to navigate away from its initial url.""" | 122 """Waits for the tab to navigate away from its initial url. |
124 end_time = time.time() + timeout | 123 |
124 If time.time() is larger than end_time, the function does nothing. | |
125 Otherwise, the function tries to return no later than end_time. | |
126 """ | |
125 while True: | 127 while True: |
126 seconds_to_wait = end_time - time.time() | 128 seconds_to_wait = end_time - time.time() |
127 seconds_to_wait = max(0, seconds_to_wait) | 129 if seconds_to_wait <= 0: |
nednguyen
2015/05/12 16:07:43
what about:
if time.time() >= end_time:
break
erikchen
2015/05/12 18:43:48
Each iteration of the loop should only call time.t
nednguyen
2015/05/12 19:37:57
ah, I didn't show the seconds_to_wait below.
| |
128 | |
129 if seconds_to_wait == 0: | |
130 break | 130 break |
131 | 131 |
132 current_url = self._RetrieveTabUrl(tab, seconds_to_wait) | 132 current_url = self._RetrieveTabUrl(tab, seconds_to_wait) |
133 if current_url != initial_url: | 133 if current_url != initial_url and current_url != "": |
134 break | 134 break |
135 | 135 |
136 # Retrieving the current url is a non-trivial operation. Add a small | 136 # Retrieving the current url is a non-trivial operation. Add a small |
137 # sleep here to prevent this method from contending with the actual | 137 # sleep here to prevent this method from contending with the actual |
138 # navigation. | 138 # navigation. |
139 time.sleep(0.01) | 139 time.sleep(0.01) |
140 | 140 |
141 def _WaitForTabToBeReady(self, tab, end_time): | |
nednguyen
2015/05/12 16:07:43
Why do we need this? Ain't HasReachedQuiescence c
erikchen
2015/05/12 18:43:48
Through experimental testing, it isn't sufficient.
nednguyen
2015/05/12 19:37:57
Ok, this makes sense. I think we should also move
erikchen
2015/05/12 19:44:38
Done.
| |
142 """Waits for the tab to be ready. | |
143 | |
144 If time.time() is larger than end_time, the function does nothing. | |
145 Otherwise, the function tries to return no later than end_time. | |
146 """ | |
147 seconds_to_wait = end_time - time.time() | |
148 if seconds_to_wait <= 0: | |
149 return | |
150 tab.WaitForDocumentReadyStateToBeComplete(seconds_to_wait) | |
151 | |
141 def _BatchNavigateTabs(self, batch): | 152 def _BatchNavigateTabs(self, batch): |
142 """Performs a batch of tab navigations with minimal delay. | 153 """Performs a batch of tab navigations with minimal delay. |
143 | 154 |
144 Args: | 155 Args: |
145 batch: A list of tuples (tab, url). | 156 batch: A list of tuples (tab, url). |
146 | 157 |
147 Returns: | 158 Returns: |
148 A list of tuples (tab, initial_url). |initial_url| is the url of the | 159 A list of tuples (tab, initial_url). |initial_url| is the url of the |
149 |tab| prior to a navigation command being sent to it. | 160 |tab| prior to a navigation command being sent to it. |
150 """ | 161 """ |
151 timeout_in_seconds = 0 | 162 # Attempting to pass in a timeout of 0 seconds results in a synchronous |
163 # socket error from the websocket library. Pass in a very small timeout | |
164 # instead so that the websocket library raises a Timeout exception. This | |
165 # prevents the logic from accidentally catching different socket | |
166 # exceptions. | |
167 timeout_in_seconds = 0.01 | |
152 | 168 |
153 queued_tabs = [] | 169 queued_tabs = [] |
154 for tab, url in batch: | 170 for tab, url in batch: |
155 initial_url = self._RetrieveTabUrl(tab, | 171 initial_url = self._RetrieveTabUrl(tab, 20) |
156 self._TAB_URL_RETRIEVAL_TIMEOUT_IN_SECONDS) | |
157 | |
158 try: | 172 try: |
159 tab.Navigate(url, None, timeout_in_seconds) | 173 tab.Navigate(url, None, timeout_in_seconds) |
160 except exceptions.Error: | 174 except exceptions.TimeoutException: |
161 # We expect a time out. It's possible for other problems to arise, but | 175 # We expect to receive a timeout exception, since we're not waiting for |
162 # this method is not responsible for dealing with them. Ignore all | 176 # the navigation to complete. |
163 # exceptions. | |
164 pass | 177 pass |
165 | |
166 queued_tabs.append((tab, initial_url)) | 178 queued_tabs.append((tab, initial_url)) |
167 return queued_tabs | 179 return queued_tabs |
168 | 180 |
169 def _WaitForQueuedTabsToLoad(self, queued_tabs): | 181 def _WaitForQueuedTabsToLoad(self, queued_tabs): |
170 """Waits for all the batch navigated tabs to finish loading. | 182 """Waits for all the batch navigated tabs to finish loading. |
171 | 183 |
172 Args: | 184 Args: |
173 queued_tabs: A list of tuples (tab, initial_url). Each tab is guaranteed | 185 queued_tabs: A list of tuples (tab, initial_url). Each tab is guaranteed |
174 to have already been sent a navigation command. | 186 to have already been sent a navigation command. |
175 """ | 187 """ |
176 end_time = time.time() + self._BATCH_PAGE_LOAD_TIMEOUT_IN_SECONDS | 188 total_batch_timeout = (len(queued_tabs) * |
189 self._BATCH_TIMEOUT_PER_PAGE_IN_SECONDS) | |
190 end_time = time.time() + total_batch_timeout | |
177 for tab, initial_url in queued_tabs: | 191 for tab, initial_url in queued_tabs: |
192 # Since we didn't wait any time for the tab url navigation to commit, it's | |
193 # possible that the tab hasn't started navigating yet. | |
194 self._WaitForUrlToChange(tab, initial_url, end_time) | |
195 self._WaitForTabToBeReady(tab, end_time) | |
196 | |
197 # Wait up to 10 seconds for the page to quiesce. If the page hasn't | |
198 # quiesced in 10 seconds, it will probably never quiesce. | |
178 seconds_to_wait = end_time - time.time() | 199 seconds_to_wait = end_time - time.time() |
179 seconds_to_wait = max(0, seconds_to_wait) | 200 seconds_to_wait = max(0, seconds_to_wait) |
180 | |
181 if seconds_to_wait == 0: | |
182 break | |
183 | |
184 # Since we don't wait any time for the tab url navigation to commit, it's | |
185 # possible that the tab hasn't started navigating yet. | |
186 self._WaitForUrlToChange(tab, initial_url, seconds_to_wait) | |
187 | |
188 seconds_to_wait = end_time - time.time() | |
189 seconds_to_wait = max(0, seconds_to_wait) | |
190 | |
191 try: | 201 try: |
192 tab.WaitForDocumentReadyStateToBeComplete(seconds_to_wait) | 202 util.WaitFor(tab.HasReachedQuiescence, seconds_to_wait) |
193 except exceptions.TimeoutException: | 203 except exceptions.TimeoutException: |
194 # Ignore time outs. | |
195 pass | 204 pass |
196 except exceptions.Error: | |
197 # If any error occurs, remove the tab. it's probably in an | |
198 # unrecoverable state. | |
199 self._RemoveNavigationTab(tab) | |
200 | 205 |
201 def _GetUrlsToNavigate(self, url_iterator): | 206 def _GetUrlsToNavigate(self, url_iterator): |
202 """Returns an array of urls to navigate to, given a url_iterator.""" | 207 """Returns an array of urls to navigate to, given a url_iterator.""" |
203 urls = [] | 208 urls = [] |
204 for _ in xrange(self._NUM_TABS): | 209 for _ in xrange(self._NUM_TABS): |
205 try: | 210 try: |
206 urls.append(url_iterator.next()) | 211 urls.append(url_iterator.next()) |
207 except StopIteration: | 212 except StopIteration: |
208 break | 213 break |
209 return urls | 214 return urls |
(...skipping 17 matching lines...) Expand all Loading... | |
227 tab = self._navigation_tabs[i] | 232 tab = self._navigation_tabs[i] |
228 batch.append((tab, url)) | 233 batch.append((tab, url)) |
229 | 234 |
230 queued_tabs = self._BatchNavigateTabs(batch) | 235 queued_tabs = self._BatchNavigateTabs(batch) |
231 self._WaitForQueuedTabsToLoad(queued_tabs) | 236 self._WaitForQueuedTabsToLoad(queued_tabs) |
232 | 237 |
233 self.CleanUpAfterBatchNavigation() | 238 self.CleanUpAfterBatchNavigation() |
234 | 239 |
235 if self.ShouldExitAfterBatchNavigation(): | 240 if self.ShouldExitAfterBatchNavigation(): |
236 break | 241 break |
OLD | NEW |