Chromium Code Reviews| Index: tools/android/loading/sandwich_misc.py |
| diff --git a/tools/android/loading/sandwich_misc.py b/tools/android/loading/sandwich_misc.py |
| index 13988c3fcf7c59547144188b8285619bb74a19bb..7068aeb985538fe332f6db0223827727db2874ac 100644 |
| --- a/tools/android/loading/sandwich_misc.py |
| +++ b/tools/android/loading/sandwich_misc.py |
| @@ -3,24 +3,35 @@ |
| # found in the LICENSE file. |
| import logging |
| +import json |
| +import os |
| +import chrome_cache |
| +import common_util |
| from loading_trace import LoadingTrace |
| from prefetch_view import PrefetchSimulationView |
| from request_dependencies_lens import RequestDependencyLens |
| -from user_satisfied_lens import FirstContentfulPaintLens |
| import wpr_backend |
| +# Do not prefetch anything. |
| +NO_DISCOVERER = 'no-discoverer' |
| + |
| +# Prefetches everything to load fully from cache (impossible in practice). |
| +FULL_CACHE_DISCOVERER = 'full-cache' |
| + |
| # Prefetches the first resource following the redirection chain. |
| REDIRECTED_MAIN_DISCOVERER = 'redirected-main' |
| # All resources which are fetched from the main document and their redirections. |
| -PARSER_DISCOVERER = 'parser', |
| +PARSER_DISCOVERER = 'parser' |
| # Simulation of HTMLPreloadScanner on the main document and their redirections. |
| -HTML_PRELOAD_SCANNER_DISCOVERER = 'html-scanner', |
| +HTML_PRELOAD_SCANNER_DISCOVERER = 'html-scanner' |
| SUBRESOURCE_DISCOVERERS = set([ |
| + NO_DISCOVERER, |
| + FULL_CACHE_DISCOVERER, |
| REDIRECTED_MAIN_DISCOVERER, |
| PARSER_DISCOVERER, |
| HTML_PRELOAD_SCANNER_DISCOVERER |
| @@ -85,7 +96,11 @@ def ExtractDiscoverableUrls(loading_trace_path, subresource_discoverer): |
| # Build the list of discovered requests according to the desired simulation. |
| discovered_requests = [] |
| - if subresource_discoverer == REDIRECTED_MAIN_DISCOVERER: |
| + if subresource_discoverer == NO_DISCOVERER: |
| + pass |
| + elif subresource_discoverer == FULL_CACHE_DISCOVERER: |
| + discovered_requests = trace.request_track.GetEvents() |
| + elif subresource_discoverer == REDIRECTED_MAIN_DISCOVERER: |
| discovered_requests = \ |
| [dependencies_lens.GetRedirectChain(first_resource_request)[-1]] |
| elif subresource_discoverer == PARSER_DISCOVERER: |
| @@ -100,7 +115,6 @@ def ExtractDiscoverableUrls(loading_trace_path, subresource_discoverer): |
| # Prune out data:// requests. |
| whitelisted_urls = set() |
| logging.info('white-listing %s' % first_resource_request.url) |
| - whitelisted_urls.add(first_resource_request.url) |
| for request in discovered_requests: |
| # Work-around where the protocol may be none for an unclear reason yet. |
| # TODO(gabadie): Follow up on this with Clovis guys and possibly remove |
| @@ -114,3 +128,127 @@ def ExtractDiscoverableUrls(loading_trace_path, subresource_discoverer): |
| logging.info('white-listing %s' % request.url) |
| whitelisted_urls.add(request.url) |
| return whitelisted_urls |
| + |
| + |
| +def CompareUrlSet(ref_url_set, url_set, url_set_name, debug_hint='Good luck!'): |
|
pasko
2016/04/18 09:36:12
I find this debug hint to be slightly offensive :)
pasko
2016/04/18 09:36:13
why is this function in sandwich_misc? is it going
pasko
2016/04/18 09:36:13
more intuitive name: PrintUrlSetComparison
A func
pasko
2016/04/18 09:36:13
not used outside sandwich_misc -> _Compare...
gabadie
2016/04/19 17:39:48
Done.
gabadie
2016/04/19 17:39:48
Not sure to understand. Used only in this file.
gabadie
2016/04/19 17:39:48
Done.
gabadie
2016/04/19 17:39:49
Done.
|
| + """Compare URL sets |
| + |
|
pasko
2016/04/18 09:36:12
need to explain what the function prints
gabadie
2016/04/19 17:39:49
Done.
|
| + Args: |
| + ref_url_set: Set of reference urls. |
| + url_set: Set of urls to compare to the reference. |
| + url_set_name: The set name for logging purposes. |
| + debug_hint: A debug hint to help debugging in any case the sets are |
| + different. |
| + """ |
| + assert type(ref_url_set) == set |
| + assert type(url_set) == set |
| + if ref_url_set == url_set: |
| + logging.info(' %d %s are matching.' % (len(ref_url_set), url_set_name)) |
|
pasko
2016/04/18 09:36:13
why % formatting here and {} formatting in other p
gabadie
2016/04/19 17:39:49
There is an annoying check in pylint to force the
|
| + return |
| + logging.error(' %s are not matching.' % url_set_name) |
| + logging.error(' Hint: ' + debug_hint) |
| + logging.error(' List of missing resources:') |
| + for url in ref_url_set.difference(url_set): |
| + logging.error('- ' + url) |
| + logging.error(' List of unexpected resources:') |
| + for url in url_set.difference(ref_url_set): |
| + logging.error('+ ' + url) |
| + |
| + |
| +def _ListUrlRequests(trace, from_cache=None): |
|
pasko
2016/04/18 09:36:13
default arguments that can be True, False and None
gabadie
2016/04/19 17:39:48
Done.
|
| + urls = set() |
| + for request_event in trace.request_track.GetEvents(): |
| + if request_event.protocol == None: |
| + continue |
| + if not request_event.protocol.startswith('http'): |
| + continue |
| + if from_cache is not None and request_event.from_disk_cache != from_cache: |
| + continue |
| + urls.add(request_event.url) |
| + return urls |
| + |
| + |
| +def VerifyBenchmarkOutputDirectory(benchmark_setup_path, |
| + benchmark_output_directory_path): |
| + """Verifies that all run inside the run_output_directory worked as expected. |
| + |
| + Args: |
| + benchmark_setup_path: Path of the JSON of the benchmark setup. |
|
pasko
2016/04/18 09:36:13
what is a 'benchmark setup'?
gabadie
2016/04/19 17:39:48
See SetupBenchmark in sandwich_task_builder.py
|
| + benchmark_output_directory_path: Path of the benchmark output directory to |
| + verify. |
| + """ |
| + benchmark_setup = json.load(open(benchmark_setup_path)) |
| + cache_whitelist = set(benchmark_setup['cache_whitelist']) |
| + url_resources = set(benchmark_setup['url_resources']) |
| + |
| + # Verify requests from traces. |
| + run_id = -1 |
| + while True: |
| + run_id += 1 |
| + run_path = os.path.join(benchmark_output_directory_path, str(run_id)) |
| + if not os.path.isdir(run_path): |
| + break |
| + trace_path = os.path.join(run_path, 'trace.json') |
|
pasko
2016/04/18 09:36:13
'trace.json' exists in many files, consider making
gabadie
2016/04/19 17:39:48
Nop. trace.json is sandwich specific. I don't see
|
| + if not os.path.isfile(trace_path): |
| + logging.error('missing trace %s' % trace_path) |
| + continue |
| + trace = LoadingTrace.FromJsonFile(trace_path) |
| + logging.info('verifying %s from %s' % (trace.url, trace_path)) |
| + CompareUrlSet(url_resources, _ListUrlRequests(trace), 'All resources', |
|
pasko
2016/04/18 09:36:13
explicit second arg for _ListUrlRequests is needed
gabadie
2016/04/19 17:39:48
Done.
|
| + 'You may have an issue with an AJAX requests.') |
|
pasko
2016/04/18 09:36:13
I think one can easily tell this by looking at URL
gabadie
2016/04/19 17:39:48
Done.
|
| + CompareUrlSet(url_resources.intersection(cache_whitelist), |
| + _ListUrlRequests(trace, True), 'Cached resources', |
| + 'The WPR archive patcher may have an invalidation issue.') |
|
pasko
2016/04/18 09:36:13
It's slightly funny to see gabadie@ providing hint
gabadie
2016/04/19 17:39:48
The point has to give hint to user who wanted to r
|
| + CompareUrlSet(url_resources.difference(cache_whitelist), |
| + _ListUrlRequests(trace, False), 'Non cached resources') |
| + |
| + |
| +def ListResourcesUrls(benchmark_output_directory_path): |
|
pasko
2016/04/18 09:36:13
ListResourceUrls
gabadie
2016/04/19 17:39:48
Done.
|
| + """Lists all requested urls per navigated urls |
|
pasko
2016/04/18 09:36:12
s/urls/URLs/
gabadie
2016/04/19 17:39:48
Done.
|
| + |
| + Args: |
| + benchmark_output_directory_path: Path of the benchmark output directory to |
| + verify. |
| + |
| + Returns: |
| + {url -> [urls of sub-resources]} |
| + """ |
| + url_subresources = {} |
| + run_id = -1 |
| + while True: |
| + run_id += 1 |
| + run_path = os.path.join(benchmark_output_directory_path, str(run_id)) |
| + if not os.path.isdir(run_path): |
| + break |
| + trace_path = os.path.join(run_path, 'trace.json') |
| + if not os.path.isfile(trace_path): |
| + continue |
| + trace = LoadingTrace.FromJsonFile(trace_path) |
| + if trace.url in url_subresources: |
| + continue |
| + logging.info('lists resources of %s from %s' % (trace.url, trace_path)) |
| + urls_set = set() |
| + for request_event in trace.request_track.GetEvents(): |
| + if not request_event.protocol.startswith('http'): |
| + continue |
| + if request_event.url not in urls_set: |
| + logging.info(' %s' % request_event.url) |
| + urls_set.add(request_event.url) |
| + url_subresources[trace.url] = [url for url in urls_set] |
| + return url_subresources |
| + |
| + |
| +def ValidateCacheArchiveContent(ref_urls, cache_archive_path): |
| + """Validates a cache archive content. |
| + |
| + Args: |
| + ref_urls: Reference list of urls. |
| + cache_archive_path: Cache archive's path to validate. |
| + """ |
| + logging.info('lists cached urls from %s' % cache_archive_path) |
| + with common_util.TemporaryDirectory() as cache_directory: |
| + chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory) |
| + cached_urls = \ |
| + chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys() |
| + CompareUrlSet(set(ref_urls), set(cached_urls), 'cached resources', |
| + debug_hint='Looks like a response header needs to be patched.') |