OLD | NEW |
---|---|
1 # Copyright 2015 The Chromium Authors. All rights reserved. | 1 # Copyright 2015 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 import time | 4 import time |
5 | 5 |
6 from profile_creators import profile_extender | 6 from profile_creators import profile_extender |
7 from telemetry.core import exceptions | 7 from telemetry.core import exceptions |
8 from telemetry.core import util | |
8 | 9 |
9 | 10 |
10 class FastNavigationProfileExtender(profile_extender.ProfileExtender): | 11 class FastNavigationProfileExtender(profile_extender.ProfileExtender): |
11 """Extends a Chrome profile. | 12 """Extends a Chrome profile. |
12 | 13 |
13 This class creates or extends an existing profile by performing a set of tab | 14 This class creates or extends an existing profile by performing a set of tab |
14 navigations in large batches. This is accomplished by opening a large number | 15 navigations in large batches. This is accomplished by opening a large number |
15 of tabs, simultaneously navigating all the tabs, and then waiting for all the | 16 of tabs, simultaneously navigating all the tabs, and then waiting for all the |
16 tabs to load. This provides two benefits: | 17 tabs to load. This provides two benefits: |
17 - Takes advantage of the high number of logical cores on modern CPUs. | 18 - Takes advantage of the high number of logical cores on modern CPUs. |
(...skipping 11 matching lines...) Expand all Loading... | |
29 super(FastNavigationProfileExtender, self).__init__(finder_options) | 30 super(FastNavigationProfileExtender, self).__init__(finder_options) |
30 | 31 |
31 # The instance keeps a list of Tabs that can be navigated successfully. | 32 # The instance keeps a list of Tabs that can be navigated successfully. |
32 # This means that the Tab is not crashed, and is processing JavaScript in a | 33 # This means that the Tab is not crashed, and is processing JavaScript in a |
33 # timely fashion. | 34 # timely fashion. |
34 self._navigation_tabs = [] | 35 self._navigation_tabs = [] |
35 | 36 |
36 # The number of tabs to use. | 37 # The number of tabs to use. |
37 self._NUM_TABS = maximum_batch_size | 38 self._NUM_TABS = maximum_batch_size |
38 | 39 |
39 # The amount of time to wait for a batch of pages to finish loading. | 40 # The amount of additional time to wait for a batch of pages to finish |
40 self._BATCH_PAGE_LOAD_TIMEOUT_IN_SECONDS = 10 | 41 # loading for each page in the batch. |
42 self._BATCH_TIMEOUT_PER_PAGE_IN_SECONDS = 20 | |
41 | 43 |
42 # The default amount of time to wait for the retrieval of the URL of a tab. | 44 # The amount of time to wait for a page to quiesce. Some pages will never |
43 self._TAB_URL_RETRIEVAL_TIMEOUT_IN_SECONDS = 1 | 45 # quiesce. |
46 self._TIME_TO_WAIT_FOR_PAGE_TO_QUIESCE_IN_SECONDS = 10 | |
44 | 47 |
45 def Run(self): | 48 def Run(self): |
46 """Superclass override.""" | 49 """Superclass override.""" |
47 try: | 50 try: |
48 self.SetUpBrowser() | 51 self.SetUpBrowser() |
49 self._PerformNavigations() | 52 self._PerformNavigations() |
50 finally: | 53 finally: |
51 self.TearDownBrowser() | 54 self.TearDownBrowser() |
55 self.VerifyProfileWasExtended() | |
erikchen
2015/05/05 22:16:45
I'm intentionally omitting my implementation of Ve
nednguyen
2015/05/07 22:39:20
These notes to the code reviewers should also be p
erikchen
2015/05/08 19:47:58
Done.
| |
52 | 56 |
53 def GetUrlIterator(self): | 57 def GetUrlIterator(self): |
54 """Gets URLs for the browser to navigate to. | 58 """Gets URLs for the browser to navigate to. |
55 | 59 |
56 Intended for subclass override. | 60 Intended for subclass override. |
57 | 61 |
58 Returns: | 62 Returns: |
59 An iterator whose elements are urls to be navigated to. | 63 An iterator whose elements are urls to be navigated to. |
60 """ | 64 """ |
61 raise NotImplementedError() | 65 raise NotImplementedError() |
62 | 66 |
63 def ShouldExitAfterBatchNavigation(self): | 67 def ShouldExitAfterBatchNavigation(self): |
64 """Returns a boolean indicating whether profile extension is finished. | 68 """Returns a boolean indicating whether profile extension is finished. |
65 | 69 |
66 Intended for subclass override. | 70 Intended for subclass override. |
67 """ | 71 """ |
68 raise NotImplementedError() | 72 raise NotImplementedError() |
69 | 73 |
70 def CleanUpAfterBatchNavigation(self): | 74 def CleanUpAfterBatchNavigation(self): |
71 """A hook for subclasses to perform cleanup after each batch of | 75 """A hook for subclasses to perform cleanup after each batch of |
72 navigations. | 76 navigations. |
73 | 77 |
74 Can be overridden by subclasses. | 78 Can be overridden by subclasses. |
75 """ | 79 """ |
76 pass | 80 pass |
77 | 81 |
78 def _AddNewTab(self): | 82 def _AddNewTab(self): |
79 """Adds a new tab to the browser.""" | 83 """Adds a new tab to the browser.""" |
80 | 84 self._navigation_tabs.append(self._browser.tabs.New()) |
81 # Adding a new tab requires making a request over devtools. This can fail | |
82 # for a variety of reasons. Retry 3 times. | |
83 retry_count = 3 | |
84 for i in range(retry_count): | |
85 try: | |
86 self._navigation_tabs.append(self._browser.tabs.New()) | |
87 except exceptions.Error: | |
88 if i == retry_count - 1: | |
89 raise | |
90 else: | |
91 break | |
92 | 85 |
93 def _RefreshNavigationTabs(self): | 86 def _RefreshNavigationTabs(self): |
94 """Updates the member self._navigation_tabs to contain self._NUM_TABS | 87 """Updates the member self._navigation_tabs to contain self._NUM_TABS |
95 elements, each of which is not crashed. The crashed tabs are intentionally | 88 elements, each of which is not crashed. The crashed tabs are intentionally |
96 leaked, since Telemetry doesn't have a good way of killing crashed tabs. | 89 leaked, since Telemetry doesn't have a good way of killing crashed tabs. |
dtu
2015/05/07 21:50:36
I have a question. What happens if you call tab.Cl
erikchen
2015/05/08 19:47:58
The exception: "DevtoolsTargetCrashException: Devt
| |
97 | 90 |
98 It is also possible for a tab to be stalled in an infinite JavaScript loop. | 91 It is also possible for a tab to be stalled in an infinite JavaScript loop. |
99 These tabs will be in self.browser.tabs, but not in self._navigation_tabs. | 92 These tabs will be in self.browser.tabs, but not in self._navigation_tabs. |
100 There is no way to kill these tabs, so they are also leaked. This method is | 93 There is no way to kill these tabs, so they are also leaked. This method is |
101 careful to only use tabs in self._navigation_tabs, or newly created tabs. | 94 careful to only use tabs in self._navigation_tabs, or newly created tabs. |
102 """ | 95 """ |
103 live_tabs = [tab for tab in self._navigation_tabs if tab.IsAlive()] | 96 live_tabs = [tab for tab in self._navigation_tabs if tab.IsAlive()] |
104 self._navigation_tabs = live_tabs | 97 self._navigation_tabs = live_tabs |
105 | 98 |
106 while len(self._navigation_tabs) < self._NUM_TABS: | 99 while len(self._navigation_tabs) < self._NUM_TABS: |
107 self._AddNewTab() | 100 self._AddNewTab() |
108 | 101 |
109 def _RemoveNavigationTab(self, tab): | 102 def _RemoveNavigationTab(self, tab): |
110 """Removes a tab which is no longer in a useable state from | 103 """Removes a tab which is no longer in a useable state from |
111 self._navigation_tabs. The tab is not removed from self.browser.tabs, | 104 self._navigation_tabs. The tab is not removed from self.browser.tabs, |
112 since there is no guarantee that the tab can be safely removed.""" | 105 since there is no guarantee that the tab can be safely removed.""" |
113 self._navigation_tabs.remove(tab) | 106 self._navigation_tabs.remove(tab) |
114 | 107 |
115 def _RetrieveTabUrl(self, tab, timeout): | 108 def _RetrieveTabUrl(self, tab, timeout): |
116 """Retrives the URL of the tab.""" | 109 """Retrives the URL of the tab.""" |
117 try: | 110 return tab.EvaluateJavaScript('document.URL', timeout) |
dtu
2015/05/07 21:50:36
Does tab.url not work in this case? (tab.url uses
erikchen
2015/05/08 19:47:58
That's a good suggestion - I've added a comment he
| |
118 return tab.EvaluateJavaScript('document.URL', timeout) | |
119 except exceptions.Error: | |
120 return None | |
121 | 111 |
122 def _WaitForUrlToChange(self, tab, initial_url, timeout): | 112 def _WaitForUrlToChange(self, tab, initial_url, end_time): |
123 """Waits for the tab to navigate away from its initial url.""" | 113 """Waits for the tab to navigate away from its initial url. |
124 end_time = time.time() + timeout | 114 |
115 If time.time() is larger than end_time, the function does nothing. | |
116 Otherwise, the function tries to return no later than end_time. | |
117 """ | |
125 while True: | 118 while True: |
126 seconds_to_wait = end_time - time.time() | 119 seconds_to_wait = end_time - time.time() |
127 seconds_to_wait = max(0, seconds_to_wait) | 120 seconds_to_wait = max(0, seconds_to_wait) |
128 | 121 |
129 if seconds_to_wait == 0: | 122 if seconds_to_wait == 0: |
130 break | 123 break |
131 | 124 |
132 current_url = self._RetrieveTabUrl(tab, seconds_to_wait) | 125 current_url = self._RetrieveTabUrl(tab, seconds_to_wait) |
133 if current_url != initial_url: | 126 if current_url != initial_url and current_url != "": |
134 break | 127 break |
135 | 128 |
136 # Retrieving the current url is a non-trivial operation. Add a small | 129 # Retrieving the current url is a non-trivial operation. Add a small |
137 # sleep here to prevent this method from contending with the actual | 130 # sleep here to prevent this method from contending with the actual |
138 # navigation. | 131 # navigation. |
139 time.sleep(0.01) | 132 time.sleep(0.01) |
140 | 133 |
134 def _WaitForTabToBeReady(self, tab, end_time): | |
135 """Waits for the tab to be ready. | |
136 | |
137 If time.time() is larger than end_time, the function does nothing. | |
138 Otherwise, the function tries to return no later than end_time. | |
139 """ | |
140 seconds_to_wait = end_time - time.time() | |
141 seconds_to_wait = max(0, seconds_to_wait) | |
142 if seconds_to_wait == 0: | |
dtu
2015/05/07 21:50:36
Instead of doing max(), why not just check seconds
erikchen
2015/05/08 19:47:58
Done.
| |
143 return | |
144 tab.WaitForDocumentReadyStateToBeComplete(seconds_to_wait) | |
145 | |
141 def _BatchNavigateTabs(self, batch): | 146 def _BatchNavigateTabs(self, batch): |
142 """Performs a batch of tab navigations with minimal delay. | 147 """Performs a batch of tab navigations with minimal delay. |
143 | 148 |
144 Args: | 149 Args: |
145 batch: A list of tuples (tab, url). | 150 batch: A list of tuples (tab, url). |
146 | 151 |
147 Returns: | 152 Returns: |
148 A list of tuples (tab, initial_url). |initial_url| is the url of the | 153 A list of tuples (tab, initial_url). |initial_url| is the url of the |
149 |tab| prior to a navigation command being sent to it. | 154 |tab| prior to a navigation command being sent to it. |
150 """ | 155 """ |
151 timeout_in_seconds = 0 | 156 # Attempting to pass in a timeout of 0 seconds results in a synchronous |
157 # socket error from the websocket library. Pass in a very small timeout | |
158 # instead so that the websocket library raises a Timeout exception. This | |
159 # prevents the logic from accidentally catching different socket | |
160 # exceptions. | |
161 timeout_in_seconds = 0.01 | |
152 | 162 |
153 queued_tabs = [] | 163 queued_tabs = [] |
154 for tab, url in batch: | 164 for tab, url in batch: |
155 initial_url = self._RetrieveTabUrl(tab, | 165 initial_url = self._RetrieveTabUrl(tab, 20) |
156 self._TAB_URL_RETRIEVAL_TIMEOUT_IN_SECONDS) | |
157 | |
158 try: | 166 try: |
159 tab.Navigate(url, None, timeout_in_seconds) | 167 tab.Navigate(url, None, timeout_in_seconds) |
160 except exceptions.Error: | 168 except exceptions.TimeoutException: |
161 # We expect a time out. It's possible for other problems to arise, but | 169 # We expect to receive a timeout exception, since we're not waiting for |
162 # this method is not responsible for dealing with them. Ignore all | 170 # the navigation to complete. |
163 # exceptions. | |
164 pass | 171 pass |
165 | |
166 queued_tabs.append((tab, initial_url)) | 172 queued_tabs.append((tab, initial_url)) |
167 return queued_tabs | 173 return queued_tabs |
168 | 174 |
169 def _WaitForQueuedTabsToLoad(self, queued_tabs): | 175 def _WaitForQueuedTabsToLoad(self, queued_tabs): |
170 """Waits for all the batch navigated tabs to finish loading. | 176 """Waits for all the batch navigated tabs to finish loading. |
171 | 177 |
172 Args: | 178 Args: |
173 queued_tabs: A list of tuples (tab, initial_url). Each tab is guaranteed | 179 queued_tabs: A list of tuples (tab, initial_url). Each tab is guaranteed |
174 to have already been sent a navigation command. | 180 to have already been sent a navigation command. |
175 """ | 181 """ |
176 end_time = time.time() + self._BATCH_PAGE_LOAD_TIMEOUT_IN_SECONDS | 182 total_batch_timeout = (len(queued_tabs) * |
183 self._BATCH_TIMEOUT_PER_PAGE_IN_SECONDS) | |
184 end_time = time.time() + total_batch_timeout | |
177 for tab, initial_url in queued_tabs: | 185 for tab, initial_url in queued_tabs: |
186 # Since we didn't wait any time for the tab url navigation to commit, it's | |
187 # possible that the tab hasn't started navigating yet. | |
188 self._WaitForUrlToChange(tab, initial_url, end_time) | |
189 self._WaitForTabToBeReady(tab, end_time) | |
190 | |
191 # Wait up to 10 seconds for the page to quiesce. If the page hasn't | |
192 # quiesced in 10 seconds, it will probably never quiesce. | |
178 seconds_to_wait = end_time - time.time() | 193 seconds_to_wait = end_time - time.time() |
179 seconds_to_wait = max(0, seconds_to_wait) | 194 seconds_to_wait = max(0, seconds_to_wait) |
180 | 195 seconds_to_wait = min(self._TIME_TO_WAIT_FOR_PAGE_TO_QUIESCE_IN_SECONDS, |
181 if seconds_to_wait == 0: | 196 seconds_to_wait) |
182 break | |
183 | |
184 # Since we don't wait any time for the tab url navigation to commit, it's | |
185 # possible that the tab hasn't started navigating yet. | |
186 self._WaitForUrlToChange(tab, initial_url, seconds_to_wait) | |
187 | |
188 seconds_to_wait = end_time - time.time() | |
189 seconds_to_wait = max(0, seconds_to_wait) | |
190 | |
191 try: | 197 try: |
192 tab.WaitForDocumentReadyStateToBeComplete(seconds_to_wait) | 198 util.WaitFor(lambda tab=tab: tab.HasReachedQuiescence(), |
dtu
2015/05/07 21:50:36
Just lambda: tab.HasReachedQuiescence()
erikchen
2015/05/08 19:47:58
Actually, there's no need for the lambda either.
| |
199 seconds_to_wait) | |
193 except exceptions.TimeoutException: | 200 except exceptions.TimeoutException: |
194 # Ignore time outs. | |
195 pass | 201 pass |
196 except exceptions.Error: | |
197 # If any error occurs, remove the tab. it's probably in an | |
198 # unrecoverable state. | |
199 self._RemoveNavigationTab(tab) | |
200 | 202 |
201 def _GetUrlsToNavigate(self, url_iterator): | 203 def _GetUrlsToNavigate(self, url_iterator): |
202 """Returns an array of urls to navigate to, given a url_iterator.""" | 204 """Returns an array of urls to navigate to, given a url_iterator.""" |
203 urls = [] | 205 urls = [] |
204 for _ in xrange(self._NUM_TABS): | 206 for _ in xrange(self._NUM_TABS): |
205 try: | 207 try: |
206 urls.append(url_iterator.next()) | 208 urls.append(url_iterator.next()) |
207 except StopIteration: | 209 except StopIteration: |
208 break | 210 break |
209 return urls | 211 return urls |
(...skipping 17 matching lines...) Expand all Loading... | |
227 tab = self._navigation_tabs[i] | 229 tab = self._navigation_tabs[i] |
228 batch.append((tab, url)) | 230 batch.append((tab, url)) |
229 | 231 |
230 queued_tabs = self._BatchNavigateTabs(batch) | 232 queued_tabs = self._BatchNavigateTabs(batch) |
231 self._WaitForQueuedTabsToLoad(queued_tabs) | 233 self._WaitForQueuedTabsToLoad(queued_tabs) |
232 | 234 |
233 self.CleanUpAfterBatchNavigation() | 235 self.CleanUpAfterBatchNavigation() |
234 | 236 |
235 if self.ShouldExitAfterBatchNavigation(): | 237 if self.ShouldExitAfterBatchNavigation(): |
236 break | 238 break |
OLD | NEW |