Index: tools/perf/profile_creators/fast_navigation_profile_extender.py |
diff --git a/tools/perf/profile_creators/fast_navigation_profile_extender.py b/tools/perf/profile_creators/fast_navigation_profile_extender.py |
new file mode 100644 |
index 0000000000000000000000000000000000000000..3d0bb783d86e9e7e1ecf655c15dfa742ce8c0b3d |
--- /dev/null |
+++ b/tools/perf/profile_creators/fast_navigation_profile_extender.py |
@@ -0,0 +1,188 @@ |
+# Copyright 2015 The Chromium Authors. All rights reserved. |
+# Use of this source code is governed by a BSD-style license that can be |
+# found in the LICENSE file. |
+import time |
+ |
+from telemetry.core import browser_finder |
+from telemetry.core import browser_finder_exceptions |
+from telemetry.core import exceptions |
+from telemetry.core import util |
+ |
+ |
+class FastNavigationProfileExtender(object): |
+ """ |
+ This class creates or extends an existing profile by performing a set of tab |
+ navigations in large batches. This is accomplished by opening a large number |
+ of tabs, simultaneously navigating all the tabs, and then waiting for all the |
+ tabs to load. This provides two benefits: |
+ - Takes advantage of the high number of logical cores on modern CPUs. |
+ - The total time spent waiting for navigations to time out scales linearly |
+ with the number of batches, but does not scale with the size of the |
+ batch. |
nednguyen
2015/02/12 02:10:53
Style nits: please make this docstring and others
erikchen
2015/02/12 03:02:58
Done. I've updated all the formatting to be PEP co
|
+ """ |
+ def __init__(self): |
+ super(FastNavigationProfileExtender, self).__init__() |
+ |
+ # A reference to the browser that will be performing all of the tab |
+ # navigations. |
+ self._browser = None |
+ |
+ # A static copy of the urls that this class is going to navigate to. |
+ self._navigation_urls = None |
+ |
+ # The number of tabs to use. |
+ self._NUM_TABS = 15 |
+ |
+ # The number of pages to load in parallel. |
+ self._NUM_PARALLEL_PAGES = 15 |
+ |
+ # It doesn't make sense for the batch size to be larger than the number of |
+ # available tabs. |
nednguyen
2015/02/12 02:10:53
Style nits: rather than comments, you can do:
asse
erikchen
2015/02/12 03:02:58
Good point, done.
|
+ assert(self._NUM_PARALLEL_PAGES <= self._NUM_TABS) |
+ |
+ # The amount of time to wait for pages to finish loading. |
+ self._PAGE_LOAD_TIMEOUT_IN_SECONDS = 10 |
+ |
+ # The amount of time to wait for the retrieval of the URL of a tab. |
+ self._TAB_URL_RETRIEVAL_TIMEOUT_IN_SECONDS = 1 |
+ |
+ # The amount of time to wait for a navigation to be committed. |
+ self._NAVIGATION_COMMIT_WAIT_IN_SECONDS = 0.1 |
+ |
+ def Run(self, finder_options): |
+ """ |
+ |finder_options| contains the directory of the input profile, the directory |
+ to place the output profile, and sufficient information to choose a specific |
+ browser binary. |
+ """ |
+ try: |
+ self._navigation_urls = self.GetUrlsToNavigate() |
+ self._SetUpBrowser(finder_options) |
+ self._PerformNavigations() |
+ finally: |
+ self._TearDownBrowser() |
+ |
+ def GetUrlsToNavigate(self): |
+ """ |
+ Intended for subclass override. Returns a list of urls to be navigated to. |
+ """ |
+ raise NotImplementedError() |
+ |
+ |
+ def _GetPossibleBrowser(self, finder_options): |
+ """Return a possible_browser with the given options.""" |
+ possible_browser = browser_finder.FindBrowser(finder_options) |
+ if not possible_browser: |
+ raise browser_finder_exceptions.BrowserFinderException( |
+ 'No browser found.\n\nAvailable browsers:\n%s\n' % |
+ '\n'.join(browser_finder.GetAllAvailableBrowserTypes(finder_options))) |
+ finder_options.browser_options.browser_type = ( |
+ possible_browser.browser_type) |
+ |
+ return possible_browser |
+ |
+ def _RetrieveTabUrl(self, tab): |
+ """Retrives the URL of the tab.""" |
+ try: |
+ return tab.EvaluateJavaScript('document.URL', |
+ self._TAB_URL_RETRIEVAL_TIMEOUT_IN_SECONDS) |
+ except exceptions.DevtoolsTargetCrashException: |
+ return None |
+ |
+ def _BatchNavigateTabs(self, batch): |
+ """ |
+ Performs a batch of tab navigations with minimal delay. |
+ |
+ |batch| is a list of tuples (tab, url). |
+ |
+ This method returns a list of tuples (tab, initial_url). |initial_url| is |
+ the url of the |tab| prior to a navigation command being sent to it. |
+ """ |
+ timeout_in_seconds = 0 |
+ |
+ queued_tabs = [] |
+ for tab, url in batch: |
+ initial_url = self._RetrieveTabUrl(tab) |
+ |
+ try: |
+ tab.Navigate(url, None, timeout_in_seconds) |
+ except exceptions.DevtoolsTargetCrashException: |
+ # We expect a time out, and don't mind if the webpage crashes. Ignore |
+ # both exceptions. |
nednguyen
2015/02/12 02:10:53
Should we close crashed tabs & reopen new tabs?
erikchen
2015/02/12 02:26:07
Yes. But there's a whole can of worms that's going
nednguyen
2015/02/12 03:17:59
SGTM.
We have a filed bug for that one: https://co
|
+ pass |
+ |
+ queued_tabs.append((tab, initial_url)) |
+ return queued_tabs |
+ |
+ def _WaitForQueuedTabsToLoad(self, queued_tabs): |
+ """ |
+ Waits for all the batch navigated tabs to finish loading. |
+ |
+ |queued_tabs| is a list of tuples (tab, initial_url). Each tab is |
+ guaranteed to have already been sent a navigation command. |
+ """ |
+ end_time = time.time() + self._PAGE_LOAD_TIMEOUT_IN_SECONDS |
nednguyen
2015/02/12 02:10:53
Based on the implementation below, it looks like _
erikchen
2015/02/12 02:26:07
Yes. One of the major points of this class is to b
nednguyen
2015/02/12 03:17:59
Then why not timeout after each page after 1.5 sec
erikchen
2015/02/12 03:24:18
Because most pages requires longer than 1.5 second
nednguyen
2015/02/12 03:29:53
Ok, this is legit. Maybe rename _PAGE_LOAD_TIMEOUT
|
+ for tab, initial_url in queued_tabs: |
+ seconds_to_wait = end_time - time.time() |
+ seconds_to_wait = max(0, seconds_to_wait) |
+ |
+ if seconds_to_wait == 0: |
+ break |
+ |
+ # Since we don't wait any time for the tab url navigation to commit, it's |
+ # possible that the tab hasn't started navigating yet. |
+ current_url = self._RetrieveTabUrl(tab) |
+ |
+ if current_url == initial_url: |
+ # If the navigation hasn't been committed yet, wait a small amount of |
+ # time. Don't bother rechecking the condition, since it's also possible |
+ # that the web page isn't processing javascript. |
+ time.sleep(self._NAVIGATION_COMMIT_WAIT_IN_SECONDS) |
nednguyen
2015/02/12 02:10:53
I think you should use
tab.WaitForNavigate(timeout
erikchen
2015/02/12 02:26:07
That doesn't work. WaitForNavigate assumes that a
nednguyen
2015/02/12 03:17:59
Interesting. The case "navigation has already fini
erikchen
2015/02/12 03:24:18
I spoke to quickly - you're right. Navigation hasn
|
+ |
+ try: |
+ tab.WaitForDocumentReadyStateToBeComplete(seconds_to_wait) |
+ except (util.TimeoutException, exceptions.DevtoolsTargetCrashException): |
+ # Ignore time outs and web page crashes. |
+ pass |
+ |
+ def _SetUpBrowser(self, finder_options): |
+ """ |
+ Finds the browser, starts the browser, and opens the requisite number of |
+ tabs. |
+ """ |
+ possible_browser = self._GetPossibleBrowser(finder_options) |
+ self._browser = possible_browser.Create(finder_options) |
+ |
+ for _ in range(self._NUM_TABS): |
+ self._browser.tabs.New() |
+ |
+ def _PerformNavigations(self): |
+ """ |
+ Performs the navigations specified by |_navigation_urls| in large batches. |
+ """ |
+ # The index of the first url that has not yet been navigated to. |
+ navigation_url_index = 0 |
+ while True: |
+ # Generate the next batch of navigations. |
+ batch = [] |
+ max_index = min(navigation_url_index + self._NUM_PARALLEL_PAGES, |
+ len(self._navigation_urls)) |
+ for i in range(navigation_url_index, max_index): |
+ url = self._navigation_urls[i] |
+ tab = self._browser.tabs[i % self._NUM_TABS] |
+ batch.append((tab, url)) |
+ navigation_url_index = max_index |
+ |
+ queued_tabs = self._BatchNavigateTabs(batch) |
+ self._WaitForQueuedTabsToLoad(queued_tabs) |
+ |
+ if navigation_url_index == len(self._navigation_urls): |
+ break |
+ |
+ def _TearDownBrowser(self): |
+ """ |
+ Teardown that is guaranteed to be executed before the instance is destroyed. |
+ """ |
+ if self._browser: |
+ self._browser.Close() |
+ self._browser = None |