tools/android/loading/sandwich_misc.py - Issue 1872313002: sandwich: Implement SandwichTaskBuilder

Unified Diff: tools/android/loading/sandwich_misc.py

Issue 1872313002: sandwich: Implement SandwichTaskBuilder (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: mv sandwich_tasks.py -> sandwich_task_builder.py Created 4 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« tools/android/loading/sandwich.py ('K') | « tools/android/loading/sandwich.py ('k') | tools/android/loading/sandwich_misc_unittest.py » ('j') | tools/android/loading/sandwich_misc_unittest.py » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: tools/android/loading/sandwich_misc.py

diff --git a/tools/android/loading/sandwich_misc.py b/tools/android/loading/sandwich_misc.py

index 13988c3fcf7c59547144188b8285619bb74a19bb..7068aeb985538fe332f6db0223827727db2874ac 100644

--- a/tools/android/loading/sandwich_misc.py

+++ b/tools/android/loading/sandwich_misc.py

@@ -3,24 +3,35 @@

# found in the LICENSE file.

import logging

+import json

+import os

+import chrome_cache

+import common_util

from loading_trace import LoadingTrace

from prefetch_view import PrefetchSimulationView

from request_dependencies_lens import RequestDependencyLens

-from user_satisfied_lens import FirstContentfulPaintLens

import wpr_backend

+# Do not prefetch anything.

+NO_DISCOVERER = 'no-discoverer'

+# Prefetches everything to load fully from cache (impossible in practice).

+FULL_CACHE_DISCOVERER = 'full-cache'

# Prefetches the first resource following the redirection chain.

REDIRECTED_MAIN_DISCOVERER = 'redirected-main'

# All resources which are fetched from the main document and their redirections.

-PARSER_DISCOVERER = 'parser',

+PARSER_DISCOVERER = 'parser'

# Simulation of HTMLPreloadScanner on the main document and their redirections.

-HTML_PRELOAD_SCANNER_DISCOVERER = 'html-scanner',

+HTML_PRELOAD_SCANNER_DISCOVERER = 'html-scanner'

SUBRESOURCE_DISCOVERERS = set([

+ NO_DISCOVERER,

+ FULL_CACHE_DISCOVERER,

REDIRECTED_MAIN_DISCOVERER,

PARSER_DISCOVERER,

HTML_PRELOAD_SCANNER_DISCOVERER

@@ -85,7 +96,11 @@ def ExtractDiscoverableUrls(loading_trace_path, subresource_discoverer):

# Build the list of discovered requests according to the desired simulation.

discovered_requests = []

- if subresource_discoverer == REDIRECTED_MAIN_DISCOVERER:

+ if subresource_discoverer == NO_DISCOVERER:

+ pass

+ elif subresource_discoverer == FULL_CACHE_DISCOVERER:

+ discovered_requests = trace.request_track.GetEvents()

+ elif subresource_discoverer == REDIRECTED_MAIN_DISCOVERER:

discovered_requests = \

[dependencies_lens.GetRedirectChain(first_resource_request)[-1]]

elif subresource_discoverer == PARSER_DISCOVERER:

@@ -100,7 +115,6 @@ def ExtractDiscoverableUrls(loading_trace_path, subresource_discoverer):

# Prune out data:// requests.

whitelisted_urls = set()

logging.info('white-listing %s' % first_resource_request.url)

- whitelisted_urls.add(first_resource_request.url)

for request in discovered_requests:

# Work-around where the protocol may be none for an unclear reason yet.

# TODO(gabadie): Follow up on this with Clovis guys and possibly remove

@@ -114,3 +128,127 @@ def ExtractDiscoverableUrls(loading_trace_path, subresource_discoverer):

logging.info('white-listing %s' % request.url)

whitelisted_urls.add(request.url)

return whitelisted_urls

+def CompareUrlSet(ref_url_set, url_set, url_set_name, debug_hint='Good luck!'):

pasko 2016/04/18 09:36:12 I find this debug hint to be slightly offensive :)

pasko 2016/04/18 09:36:13 why is this function in sandwich_misc? is it going

pasko 2016/04/18 09:36:13 more intuitive name: PrintUrlSetComparison A func

pasko 2016/04/18 09:36:13 not used outside sandwich_misc -> _Compare...

gabadie 2016/04/19 17:39:48 Done.

gabadie 2016/04/19 17:39:48 Not sure to understand. Used only in this file.

gabadie 2016/04/19 17:39:48 Done.

gabadie 2016/04/19 17:39:49 Done.

+ """Compare URL sets

pasko 2016/04/18 09:36:12 need to explain what the function prints

gabadie 2016/04/19 17:39:49 Done.

+ Args:

+ ref_url_set: Set of reference urls.

+ url_set: Set of urls to compare to the reference.

+ url_set_name: The set name for logging purposes.

+ debug_hint: A debug hint to help debugging in any case the sets are

+ different.

+ """

+ assert type(ref_url_set) == set

+ assert type(url_set) == set

+ if ref_url_set == url_set:

+ logging.info(' %d %s are matching.' % (len(ref_url_set), url_set_name))

pasko 2016/04/18 09:36:13 why % formatting here and {} formatting in other p

gabadie 2016/04/19 17:39:49 There is an annoying check in pylint to force the

+ return

+ logging.error(' %s are not matching.' % url_set_name)

+ logging.error(' Hint: ' + debug_hint)

+ logging.error(' List of missing resources:')

+ for url in ref_url_set.difference(url_set):

+ logging.error('- ' + url)

+ logging.error(' List of unexpected resources:')

+ for url in url_set.difference(ref_url_set):

+ logging.error('+ ' + url)

+def _ListUrlRequests(trace, from_cache=None):

pasko 2016/04/18 09:36:13 default arguments that can be True, False and None

gabadie 2016/04/19 17:39:48 Done.

+ urls = set()

+ for request_event in trace.request_track.GetEvents():

+ if request_event.protocol == None:

+ continue

+ if not request_event.protocol.startswith('http'):

+ continue

+ if from_cache is not None and request_event.from_disk_cache != from_cache:

+ continue

+ urls.add(request_event.url)

+ return urls

+def VerifyBenchmarkOutputDirectory(benchmark_setup_path,

+ benchmark_output_directory_path):

+ """Verifies that all run inside the run_output_directory worked as expected.

+ Args:

+ benchmark_setup_path: Path of the JSON of the benchmark setup.

pasko 2016/04/18 09:36:13 what is a 'benchmark setup'?

gabadie 2016/04/19 17:39:48 See SetupBenchmark in sandwich_task_builder.py

+ benchmark_output_directory_path: Path of the benchmark output directory to

+ verify.

+ """

+ benchmark_setup = json.load(open(benchmark_setup_path))

+ cache_whitelist = set(benchmark_setup['cache_whitelist'])

+ url_resources = set(benchmark_setup['url_resources'])

+ # Verify requests from traces.

+ run_id = -1

+ while True:

+ run_id += 1

+ run_path = os.path.join(benchmark_output_directory_path, str(run_id))

+ if not os.path.isdir(run_path):

+ break

+ trace_path = os.path.join(run_path, 'trace.json')

pasko 2016/04/18 09:36:13 'trace.json' exists in many files, consider making

gabadie 2016/04/19 17:39:48 Nop. trace.json is sandwich specific. I don't see

+ if not os.path.isfile(trace_path):

+ logging.error('missing trace %s' % trace_path)

+ continue

+ trace = LoadingTrace.FromJsonFile(trace_path)

+ logging.info('verifying %s from %s' % (trace.url, trace_path))

+ CompareUrlSet(url_resources, _ListUrlRequests(trace), 'All resources',

pasko 2016/04/18 09:36:13 explicit second arg for _ListUrlRequests is needed

gabadie 2016/04/19 17:39:48 Done.

+ 'You may have an issue with an AJAX requests.')

pasko 2016/04/18 09:36:13 I think one can easily tell this by looking at URL

gabadie 2016/04/19 17:39:48 Done.

+ CompareUrlSet(url_resources.intersection(cache_whitelist),

+ _ListUrlRequests(trace, True), 'Cached resources',

+ 'The WPR archive patcher may have an invalidation issue.')

pasko 2016/04/18 09:36:13 It's slightly funny to see gabadie@ providing hint

gabadie 2016/04/19 17:39:48 The point has to give hint to user who wanted to r

+ CompareUrlSet(url_resources.difference(cache_whitelist),

+ _ListUrlRequests(trace, False), 'Non cached resources')

+def ListResourcesUrls(benchmark_output_directory_path):

pasko 2016/04/18 09:36:13 ListResourceUrls

gabadie 2016/04/19 17:39:48 Done.

+ """Lists all requested urls per navigated urls

pasko 2016/04/18 09:36:12 s/urls/URLs/

gabadie 2016/04/19 17:39:48 Done.

+ Args:

+ benchmark_output_directory_path: Path of the benchmark output directory to

+ verify.

+ Returns:

+ {url -> [urls of sub-resources]}

+ """

+ url_subresources = {}

+ run_id = -1

+ while True:

+ run_id += 1

+ run_path = os.path.join(benchmark_output_directory_path, str(run_id))

+ if not os.path.isdir(run_path):

+ break

+ trace_path = os.path.join(run_path, 'trace.json')

+ if not os.path.isfile(trace_path):

+ continue

+ trace = LoadingTrace.FromJsonFile(trace_path)

+ if trace.url in url_subresources:

+ continue

+ logging.info('lists resources of %s from %s' % (trace.url, trace_path))

+ urls_set = set()

+ for request_event in trace.request_track.GetEvents():

+ if not request_event.protocol.startswith('http'):

+ continue

+ if request_event.url not in urls_set:

+ logging.info(' %s' % request_event.url)

+ urls_set.add(request_event.url)

+ url_subresources[trace.url] = [url for url in urls_set]

+ return url_subresources

+def ValidateCacheArchiveContent(ref_urls, cache_archive_path):

+ """Validates a cache archive content.

+ Args:

+ ref_urls: Reference list of urls.

+ cache_archive_path: Cache archive's path to validate.

+ """

+ logging.info('lists cached urls from %s' % cache_archive_path)

+ with common_util.TemporaryDirectory() as cache_directory:

+ chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory)

+ cached_urls = \

+ chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys()

+ CompareUrlSet(set(ref_urls), set(cached_urls), 'cached resources',

+ debug_hint='Looks like a response header needs to be patched.')