Chromium Code Reviews| Index: tools/android/loading/sandwich_misc.py |
| diff --git a/tools/android/loading/sandwich_misc.py b/tools/android/loading/sandwich_misc.py |
| index e7b00c625c949a485587678e1e07ff737b6ab1f7..dd057aa65369350bb1fcca58823de3c05a702562 100644 |
| --- a/tools/android/loading/sandwich_misc.py |
| +++ b/tools/android/loading/sandwich_misc.py |
| @@ -3,14 +3,24 @@ |
| # found in the LICENSE file. |
| import logging |
| +import json |
| +import os |
| +import chrome_cache |
| +import common_util |
| from loading_trace import LoadingTrace |
| from prefetch_view import PrefetchSimulationView |
| from request_dependencies_lens import RequestDependencyLens |
| -from user_satisfied_lens import FirstContentfulPaintLens |
| +import sandwich_runner |
| import wpr_backend |
| +# Do not prefetch anything. |
| +EMPTY_CACHE_DISCOVERER = 'empty-cache' |
| + |
| +# Prefetches everything to load fully from cache (impossible in practice). |
| +FULL_CACHE_DISCOVERER = 'full-cache' |
| + |
| # Prefetches the first resource following the redirection chain. |
| REDIRECTED_MAIN_DISCOVERER = 'redirected-main' |
| @@ -21,6 +31,8 @@ PARSER_DISCOVERER = 'parser' |
| HTML_PRELOAD_SCANNER_DISCOVERER = 'html-scanner' |
| SUBRESOURCE_DISCOVERERS = set([ |
| + EMPTY_CACHE_DISCOVERER, |
| + FULL_CACHE_DISCOVERER, |
| REDIRECTED_MAIN_DISCOVERER, |
| PARSER_DISCOVERER, |
| HTML_PRELOAD_SCANNER_DISCOVERER |
| @@ -85,7 +97,11 @@ def ExtractDiscoverableUrls(loading_trace_path, subresource_discoverer): |
| # Build the list of discovered requests according to the desired simulation. |
| discovered_requests = [] |
| - if subresource_discoverer == REDIRECTED_MAIN_DISCOVERER: |
| + if subresource_discoverer == EMPTY_CACHE_DISCOVERER: |
| + pass |
| + elif subresource_discoverer == FULL_CACHE_DISCOVERER: |
| + discovered_requests = trace.request_track.GetEvents() |
| + elif subresource_discoverer == REDIRECTED_MAIN_DISCOVERER: |
| discovered_requests = \ |
| [dependencies_lens.GetRedirectChain(first_resource_request)[-1]] |
| elif subresource_discoverer == PARSER_DISCOVERER: |
| @@ -100,7 +116,6 @@ def ExtractDiscoverableUrls(loading_trace_path, subresource_discoverer): |
| # Prune out data:// requests. |
| whitelisted_urls = set() |
| logging.info('white-listing %s' % first_resource_request.url) |
| - whitelisted_urls.add(first_resource_request.url) |
| for request in discovered_requests: |
| # Work-around where the protocol may be none for an unclear reason yet. |
| # TODO(gabadie): Follow up on this with Clovis guys and possibly remove |
| @@ -114,3 +129,135 @@ def ExtractDiscoverableUrls(loading_trace_path, subresource_discoverer): |
| logging.info('white-listing %s' % request.url) |
| whitelisted_urls.add(request.url) |
| return whitelisted_urls |
| + |
| + |
| +def _PrintUrlSetComparison(ref_url_set, url_set, url_set_name): |
| + """Compare URL sets and log the diffs. |
| + |
| + Args: |
| + ref_url_set: Set of reference urls. |
| + url_set: Set of urls to compare to the reference. |
| + url_set_name: The set name for logging purposes. |
| + """ |
| + assert type(ref_url_set) == set |
| + assert type(url_set) == set |
| + if ref_url_set == url_set: |
| + logging.info(' %d %s are matching.' % (len(ref_url_set), url_set_name)) |
| + return |
| + logging.error(' %s are not matching.' % url_set_name) |
| + logging.error(' List of missing resources:') |
| + for url in ref_url_set.difference(url_set): |
| + logging.error('- ' + url) |
| + logging.error(' List of unexpected resources:') |
| + for url in url_set.difference(ref_url_set): |
| + logging.error('+ ' + url) |
| + |
| + |
| +def _ListUrlRequests(trace, from_cache): |
| + """Lists requested URLs from a trace. |
| + |
| + Args: |
| + trace: The trace. |
| + from_cache: |
| + None to list all requested urls; |
|
pasko
2016/04/21 18:21:44
these rules are hard to remember, so the reader wo
gabadie
2016/04/22 14:16:42
Done.
|
| + True to list all requested urls served from cache; |
| + Fals to list all requested urls not served from cache. |
| + |
| + Returns: |
| + set([str]) |
| + """ |
| + urls = set() |
| + for request_event in trace.request_track.GetEvents(): |
| + if request_event.protocol == None: |
| + continue |
| + if not request_event.protocol.startswith('http'): |
|
pasko
2016/04/21 18:21:44
This was not mentioned in the docstring. Does this
gabadie
2016/04/22 14:16:42
This what this is for.
|
| + continue |
| + if from_cache is not None and request_event.from_disk_cache != from_cache: |
| + continue |
| + urls.add(request_event.url) |
| + return urls |
| + |
| + |
| +def VerifyBenchmarkOutputDirectory(benchmark_setup_path, |
|
pasko
2016/04/21 18:21:44
Need to apply the same action as for ValidateCache
gabadie
2016/04/22 14:16:42
Acknowledged. But I don't want to block sandwich w
|
| + benchmark_output_directory_path): |
| + """Verifies that all run inside the run_output_directory worked as expected. |
| + |
| + Args: |
| + benchmark_setup_path: Path of the JSON of the benchmark setup. |
| + benchmark_output_directory_path: Path of the benchmark output directory to |
| + verify. |
| + """ |
| + benchmark_setup = json.load(open(benchmark_setup_path)) |
| + cache_whitelist = set(benchmark_setup['cache_whitelist']) |
| + url_resources = set(benchmark_setup['url_resources']) |
| + |
| + # Verify requests from traces. |
| + run_id = -1 |
| + while True: |
| + run_id += 1 |
| + run_path = os.path.join(benchmark_output_directory_path, str(run_id)) |
| + if not os.path.isdir(run_path): |
| + break |
| + trace_path = os.path.join(run_path, sandwich_runner.TRACE_FILENAME) |
| + if not os.path.isfile(trace_path): |
| + logging.error('missing trace %s' % trace_path) |
| + continue |
| + trace = LoadingTrace.FromJsonFile(trace_path) |
| + logging.info('verifying %s from %s' % (trace.url, trace_path)) |
| + _PrintUrlSetComparison(url_resources, _ListUrlRequests(trace, None), |
| + 'All resources') |
| + _PrintUrlSetComparison(url_resources.intersection(cache_whitelist), |
| + _ListUrlRequests(trace, True), 'Cached resources') |
| + _PrintUrlSetComparison(url_resources.difference(cache_whitelist), |
| + _ListUrlRequests(trace, False), |
| + 'Non cached resources') |
| + |
| + |
| +def ListResourceUrls(benchmark_output_directory_path): |
|
pasko
2016/04/21 18:21:44
ReadSubresourceMapFromBenchmarkOutput(...)
gabadie
2016/04/22 14:16:42
Second time you ask me for modification! Done.
pasko
2016/04/25 13:29:06
Acknowledged.
|
| + """Lists all requested URLs per navigated URLs |
|
pasko
2016/04/21 18:21:44
"""Extracts a map URL-to-subresources for each nav
gabadie
2016/04/22 14:16:42
Second time you ask me for modification! Done.
pasko
2016/04/25 13:29:06
I will keep asking for modifications as many times
|
| + |
| + Args: |
| + benchmark_output_directory_path: Path of the benchmark output directory to |
| + verify. |
| + |
| + Returns: |
| + {url -> [URLs of sub-resources]} |
| + """ |
| + url_subresources = {} |
| + run_id = -1 |
| + while True: |
| + run_id += 1 |
| + run_path = os.path.join(benchmark_output_directory_path, str(run_id)) |
| + if not os.path.isdir(run_path): |
| + break |
| + trace_path = os.path.join(run_path, sandwich_runner.TRACE_FILENAME) |
| + if not os.path.isfile(trace_path): |
| + continue |
| + trace = LoadingTrace.FromJsonFile(trace_path) |
| + if trace.url in url_subresources: |
| + continue |
| + logging.info('lists resources of %s from %s' % (trace.url, trace_path)) |
| + urls_set = set() |
| + for request_event in trace.request_track.GetEvents(): |
| + if not request_event.protocol.startswith('http'): |
| + continue |
| + if request_event.url not in urls_set: |
| + logging.info(' %s' % request_event.url) |
| + urls_set.add(request_event.url) |
| + url_subresources[trace.url] = [url for url in urls_set] |
| + return url_subresources |
| + |
| + |
| +def ValidateCacheArchiveContent(ref_urls, cache_archive_path): |
|
pasko
2016/04/21 18:21:44
Producing log messages on error is insufficient -
gabadie
2016/04/22 14:16:42
I don't want to block sandwich workflow because of
pasko
2016/04/25 13:29:06
nit: In browser development the term XMLHttpReques
gabadie
2016/04/27 08:32:16
Acknowledged.
|
| + """Validates a cache archive content. |
| + |
| + Args: |
| + ref_urls: Reference list of urls. |
| + cache_archive_path: Cache archive's path to validate. |
| + """ |
| + logging.info('lists cached urls from %s' % cache_archive_path) |
| + with common_util.TemporaryDirectory() as cache_directory: |
| + chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory) |
| + cached_urls = \ |
| + chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys() |
| + _PrintUrlSetComparison(set(ref_urls), set(cached_urls), 'cached resources') |