Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(281)

Unified Diff: tools/android/loading/sandwich_misc.py

Issue 1872313002: sandwich: Implement SandwichTaskBuilder (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Addresses pasko's comments Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: tools/android/loading/sandwich_misc.py
diff --git a/tools/android/loading/sandwich_misc.py b/tools/android/loading/sandwich_misc.py
index e7b00c625c949a485587678e1e07ff737b6ab1f7..dd057aa65369350bb1fcca58823de3c05a702562 100644
--- a/tools/android/loading/sandwich_misc.py
+++ b/tools/android/loading/sandwich_misc.py
@@ -3,14 +3,24 @@
# found in the LICENSE file.
import logging
+import json
+import os
+import chrome_cache
+import common_util
from loading_trace import LoadingTrace
from prefetch_view import PrefetchSimulationView
from request_dependencies_lens import RequestDependencyLens
-from user_satisfied_lens import FirstContentfulPaintLens
+import sandwich_runner
import wpr_backend
+# Do not prefetch anything.
+EMPTY_CACHE_DISCOVERER = 'empty-cache'
+
+# Prefetches everything to load fully from cache (impossible in practice).
+FULL_CACHE_DISCOVERER = 'full-cache'
+
# Prefetches the first resource following the redirection chain.
REDIRECTED_MAIN_DISCOVERER = 'redirected-main'
@@ -21,6 +31,8 @@ PARSER_DISCOVERER = 'parser'
HTML_PRELOAD_SCANNER_DISCOVERER = 'html-scanner'
SUBRESOURCE_DISCOVERERS = set([
+ EMPTY_CACHE_DISCOVERER,
+ FULL_CACHE_DISCOVERER,
REDIRECTED_MAIN_DISCOVERER,
PARSER_DISCOVERER,
HTML_PRELOAD_SCANNER_DISCOVERER
@@ -85,7 +97,11 @@ def ExtractDiscoverableUrls(loading_trace_path, subresource_discoverer):
# Build the list of discovered requests according to the desired simulation.
discovered_requests = []
- if subresource_discoverer == REDIRECTED_MAIN_DISCOVERER:
+ if subresource_discoverer == EMPTY_CACHE_DISCOVERER:
+ pass
+ elif subresource_discoverer == FULL_CACHE_DISCOVERER:
+ discovered_requests = trace.request_track.GetEvents()
+ elif subresource_discoverer == REDIRECTED_MAIN_DISCOVERER:
discovered_requests = \
[dependencies_lens.GetRedirectChain(first_resource_request)[-1]]
elif subresource_discoverer == PARSER_DISCOVERER:
@@ -100,7 +116,6 @@ def ExtractDiscoverableUrls(loading_trace_path, subresource_discoverer):
# Prune out data:// requests.
whitelisted_urls = set()
logging.info('white-listing %s' % first_resource_request.url)
- whitelisted_urls.add(first_resource_request.url)
for request in discovered_requests:
# Work-around where the protocol may be none for an unclear reason yet.
# TODO(gabadie): Follow up on this with Clovis guys and possibly remove
@@ -114,3 +129,135 @@ def ExtractDiscoverableUrls(loading_trace_path, subresource_discoverer):
logging.info('white-listing %s' % request.url)
whitelisted_urls.add(request.url)
return whitelisted_urls
+
+
+def _PrintUrlSetComparison(ref_url_set, url_set, url_set_name):
+ """Compare URL sets and log the diffs.
+
+ Args:
+ ref_url_set: Set of reference urls.
+ url_set: Set of urls to compare to the reference.
+ url_set_name: The set name for logging purposes.
+ """
+ assert type(ref_url_set) == set
+ assert type(url_set) == set
+ if ref_url_set == url_set:
+ logging.info(' %d %s are matching.' % (len(ref_url_set), url_set_name))
+ return
+ logging.error(' %s are not matching.' % url_set_name)
+ logging.error(' List of missing resources:')
+ for url in ref_url_set.difference(url_set):
+ logging.error('- ' + url)
+ logging.error(' List of unexpected resources:')
+ for url in url_set.difference(ref_url_set):
+ logging.error('+ ' + url)
+
+
+def _ListUrlRequests(trace, from_cache):
+ """Lists requested URLs from a trace.
+
+ Args:
+ trace: The trace.
+ from_cache:
+ None to list all requested urls;
pasko 2016/04/21 18:21:44 these rules are hard to remember, so the reader wo
gabadie 2016/04/22 14:16:42 Done.
+ True to list all requested urls served from cache;
+ Fals to list all requested urls not served from cache.
+
+ Returns:
+ set([str])
+ """
+ urls = set()
+ for request_event in trace.request_track.GetEvents():
+ if request_event.protocol == None:
+ continue
+ if not request_event.protocol.startswith('http'):
pasko 2016/04/21 18:21:44 This was not mentioned in the docstring. Does this
gabadie 2016/04/22 14:16:42 This what this is for.
+ continue
+ if from_cache is not None and request_event.from_disk_cache != from_cache:
+ continue
+ urls.add(request_event.url)
+ return urls
+
+
+def VerifyBenchmarkOutputDirectory(benchmark_setup_path,
pasko 2016/04/21 18:21:44 Need to apply the same action as for ValidateCache
gabadie 2016/04/22 14:16:42 Acknowledged. But I don't want to block sandwich w
+ benchmark_output_directory_path):
+ """Verifies that all run inside the run_output_directory worked as expected.
+
+ Args:
+ benchmark_setup_path: Path of the JSON of the benchmark setup.
+ benchmark_output_directory_path: Path of the benchmark output directory to
+ verify.
+ """
+ benchmark_setup = json.load(open(benchmark_setup_path))
+ cache_whitelist = set(benchmark_setup['cache_whitelist'])
+ url_resources = set(benchmark_setup['url_resources'])
+
+ # Verify requests from traces.
+ run_id = -1
+ while True:
+ run_id += 1
+ run_path = os.path.join(benchmark_output_directory_path, str(run_id))
+ if not os.path.isdir(run_path):
+ break
+ trace_path = os.path.join(run_path, sandwich_runner.TRACE_FILENAME)
+ if not os.path.isfile(trace_path):
+ logging.error('missing trace %s' % trace_path)
+ continue
+ trace = LoadingTrace.FromJsonFile(trace_path)
+ logging.info('verifying %s from %s' % (trace.url, trace_path))
+ _PrintUrlSetComparison(url_resources, _ListUrlRequests(trace, None),
+ 'All resources')
+ _PrintUrlSetComparison(url_resources.intersection(cache_whitelist),
+ _ListUrlRequests(trace, True), 'Cached resources')
+ _PrintUrlSetComparison(url_resources.difference(cache_whitelist),
+ _ListUrlRequests(trace, False),
+ 'Non cached resources')
+
+
+def ListResourceUrls(benchmark_output_directory_path):
pasko 2016/04/21 18:21:44 ReadSubresourceMapFromBenchmarkOutput(...)
gabadie 2016/04/22 14:16:42 Second time you ask me for modification! Done.
pasko 2016/04/25 13:29:06 Acknowledged.
+ """Lists all requested URLs per navigated URLs
pasko 2016/04/21 18:21:44 """Extracts a map URL-to-subresources for each nav
gabadie 2016/04/22 14:16:42 Second time you ask me for modification! Done.
pasko 2016/04/25 13:29:06 I will keep asking for modifications as many times
+
+ Args:
+ benchmark_output_directory_path: Path of the benchmark output directory to
+ verify.
+
+ Returns:
+ {url -> [URLs of sub-resources]}
+ """
+ url_subresources = {}
+ run_id = -1
+ while True:
+ run_id += 1
+ run_path = os.path.join(benchmark_output_directory_path, str(run_id))
+ if not os.path.isdir(run_path):
+ break
+ trace_path = os.path.join(run_path, sandwich_runner.TRACE_FILENAME)
+ if not os.path.isfile(trace_path):
+ continue
+ trace = LoadingTrace.FromJsonFile(trace_path)
+ if trace.url in url_subresources:
+ continue
+ logging.info('lists resources of %s from %s' % (trace.url, trace_path))
+ urls_set = set()
+ for request_event in trace.request_track.GetEvents():
+ if not request_event.protocol.startswith('http'):
+ continue
+ if request_event.url not in urls_set:
+ logging.info(' %s' % request_event.url)
+ urls_set.add(request_event.url)
+ url_subresources[trace.url] = [url for url in urls_set]
+ return url_subresources
+
+
+def ValidateCacheArchiveContent(ref_urls, cache_archive_path):
pasko 2016/04/21 18:21:44 Producing log messages on error is insufficient -
gabadie 2016/04/22 14:16:42 I don't want to block sandwich workflow because of
pasko 2016/04/25 13:29:06 nit: In browser development the term XMLHttpReques
gabadie 2016/04/27 08:32:16 Acknowledged.
+ """Validates a cache archive content.
+
+ Args:
+ ref_urls: Reference list of urls.
+ cache_archive_path: Cache archive's path to validate.
+ """
+ logging.info('lists cached urls from %s' % cache_archive_path)
+ with common_util.TemporaryDirectory() as cache_directory:
+ chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory)
+ cached_urls = \
+ chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys()
+ _PrintUrlSetComparison(set(ref_urls), set(cached_urls), 'cached resources')

Powered by Google App Engine
This is Rietveld 408576698