tools/android/loading/sandwich_misc.py - Issue 1872313002: sandwich: Implement SandwichTaskBuilder

Unified Diff: tools/android/loading/sandwich_misc.py

Issue 1872313002: sandwich: Implement SandwichTaskBuilder (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Addresses pasko's comments Created 4 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « tools/android/loading/sandwich_metrics_unittest.py ('k') | tools/android/loading/sandwich_misc_unittest.py » ('j') | tools/android/loading/sandwich_task_builder.py » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: tools/android/loading/sandwich_misc.py

diff --git a/tools/android/loading/sandwich_misc.py b/tools/android/loading/sandwich_misc.py

index e7b00c625c949a485587678e1e07ff737b6ab1f7..dd057aa65369350bb1fcca58823de3c05a702562 100644

--- a/tools/android/loading/sandwich_misc.py

+++ b/tools/android/loading/sandwich_misc.py

@@ -3,14 +3,24 @@

# found in the LICENSE file.

import logging

+import json

+import os

+import chrome_cache

+import common_util

from loading_trace import LoadingTrace

from prefetch_view import PrefetchSimulationView

from request_dependencies_lens import RequestDependencyLens

-from user_satisfied_lens import FirstContentfulPaintLens

+import sandwich_runner

import wpr_backend

+# Do not prefetch anything.

+EMPTY_CACHE_DISCOVERER = 'empty-cache'

+# Prefetches everything to load fully from cache (impossible in practice).

+FULL_CACHE_DISCOVERER = 'full-cache'

# Prefetches the first resource following the redirection chain.

REDIRECTED_MAIN_DISCOVERER = 'redirected-main'

@@ -21,6 +31,8 @@ PARSER_DISCOVERER = 'parser'

HTML_PRELOAD_SCANNER_DISCOVERER = 'html-scanner'

SUBRESOURCE_DISCOVERERS = set([

+ EMPTY_CACHE_DISCOVERER,

+ FULL_CACHE_DISCOVERER,

REDIRECTED_MAIN_DISCOVERER,

PARSER_DISCOVERER,

HTML_PRELOAD_SCANNER_DISCOVERER

@@ -85,7 +97,11 @@ def ExtractDiscoverableUrls(loading_trace_path, subresource_discoverer):

# Build the list of discovered requests according to the desired simulation.

discovered_requests = []

- if subresource_discoverer == REDIRECTED_MAIN_DISCOVERER:

+ if subresource_discoverer == EMPTY_CACHE_DISCOVERER:

+ pass

+ elif subresource_discoverer == FULL_CACHE_DISCOVERER:

+ discovered_requests = trace.request_track.GetEvents()

+ elif subresource_discoverer == REDIRECTED_MAIN_DISCOVERER:

discovered_requests = \

[dependencies_lens.GetRedirectChain(first_resource_request)[-1]]

elif subresource_discoverer == PARSER_DISCOVERER:

@@ -100,7 +116,6 @@ def ExtractDiscoverableUrls(loading_trace_path, subresource_discoverer):

# Prune out data:// requests.

whitelisted_urls = set()

logging.info('white-listing %s' % first_resource_request.url)

- whitelisted_urls.add(first_resource_request.url)

for request in discovered_requests:

# Work-around where the protocol may be none for an unclear reason yet.

# TODO(gabadie): Follow up on this with Clovis guys and possibly remove

@@ -114,3 +129,135 @@ def ExtractDiscoverableUrls(loading_trace_path, subresource_discoverer):

logging.info('white-listing %s' % request.url)

whitelisted_urls.add(request.url)

return whitelisted_urls

+def _PrintUrlSetComparison(ref_url_set, url_set, url_set_name):

+ """Compare URL sets and log the diffs.

+ Args:

+ ref_url_set: Set of reference urls.

+ url_set: Set of urls to compare to the reference.

+ url_set_name: The set name for logging purposes.

+ """

+ assert type(ref_url_set) == set

+ assert type(url_set) == set

+ if ref_url_set == url_set:

+ logging.info(' %d %s are matching.' % (len(ref_url_set), url_set_name))

+ return

+ logging.error(' %s are not matching.' % url_set_name)

+ logging.error(' List of missing resources:')

+ for url in ref_url_set.difference(url_set):

+ logging.error('- ' + url)

+ logging.error(' List of unexpected resources:')

+ for url in url_set.difference(ref_url_set):

+ logging.error('+ ' + url)

+def _ListUrlRequests(trace, from_cache):

+ """Lists requested URLs from a trace.

+ Args:

+ trace: The trace.

+ from_cache:

+ None to list all requested urls;

pasko 2016/04/21 18:21:44 these rules are hard to remember, so the reader wo

gabadie 2016/04/22 14:16:42 Done.

+ True to list all requested urls served from cache;

+ Fals to list all requested urls not served from cache.

+ Returns:

+ set([str])

+ """

+ urls = set()

+ for request_event in trace.request_track.GetEvents():

+ if request_event.protocol == None:

+ continue

+ if not request_event.protocol.startswith('http'):

pasko 2016/04/21 18:21:44 This was not mentioned in the docstring. Does this

gabadie 2016/04/22 14:16:42 This what this is for.

+ continue

+ if from_cache is not None and request_event.from_disk_cache != from_cache:

+ continue

+ urls.add(request_event.url)

+ return urls

+def VerifyBenchmarkOutputDirectory(benchmark_setup_path,

pasko 2016/04/21 18:21:44 Need to apply the same action as for ValidateCache

gabadie 2016/04/22 14:16:42 Acknowledged. But I don't want to block sandwich w

+ benchmark_output_directory_path):

+ """Verifies that all run inside the run_output_directory worked as expected.

+ Args:

+ benchmark_setup_path: Path of the JSON of the benchmark setup.

+ benchmark_output_directory_path: Path of the benchmark output directory to

+ verify.

+ """

+ benchmark_setup = json.load(open(benchmark_setup_path))

+ cache_whitelist = set(benchmark_setup['cache_whitelist'])

+ url_resources = set(benchmark_setup['url_resources'])

+ # Verify requests from traces.

+ run_id = -1

+ while True:

+ run_id += 1

+ run_path = os.path.join(benchmark_output_directory_path, str(run_id))

+ if not os.path.isdir(run_path):

+ break

+ trace_path = os.path.join(run_path, sandwich_runner.TRACE_FILENAME)

+ if not os.path.isfile(trace_path):

+ logging.error('missing trace %s' % trace_path)

+ continue

+ trace = LoadingTrace.FromJsonFile(trace_path)

+ logging.info('verifying %s from %s' % (trace.url, trace_path))

+ _PrintUrlSetComparison(url_resources, _ListUrlRequests(trace, None),

+ 'All resources')

+ _PrintUrlSetComparison(url_resources.intersection(cache_whitelist),

+ _ListUrlRequests(trace, True), 'Cached resources')

+ _PrintUrlSetComparison(url_resources.difference(cache_whitelist),

+ _ListUrlRequests(trace, False),

+ 'Non cached resources')

+def ListResourceUrls(benchmark_output_directory_path):

pasko 2016/04/21 18:21:44 ReadSubresourceMapFromBenchmarkOutput(...)

gabadie 2016/04/22 14:16:42 Second time you ask me for modification! Done.

pasko 2016/04/25 13:29:06 Acknowledged.

+ """Lists all requested URLs per navigated URLs

pasko 2016/04/21 18:21:44 """Extracts a map URL-to-subresources for each nav

gabadie 2016/04/22 14:16:42 Second time you ask me for modification! Done.

pasko 2016/04/25 13:29:06 I will keep asking for modifications as many times

+ Args:

+ benchmark_output_directory_path: Path of the benchmark output directory to

+ verify.

+ Returns:

+ {url -> [URLs of sub-resources]}

+ """

+ url_subresources = {}

+ run_id = -1

+ while True:

+ run_id += 1

+ run_path = os.path.join(benchmark_output_directory_path, str(run_id))

+ if not os.path.isdir(run_path):

+ break

+ trace_path = os.path.join(run_path, sandwich_runner.TRACE_FILENAME)

+ if not os.path.isfile(trace_path):

+ continue

+ trace = LoadingTrace.FromJsonFile(trace_path)

+ if trace.url in url_subresources:

+ continue

+ logging.info('lists resources of %s from %s' % (trace.url, trace_path))

+ urls_set = set()

+ for request_event in trace.request_track.GetEvents():

+ if not request_event.protocol.startswith('http'):

+ continue

+ if request_event.url not in urls_set:

+ logging.info(' %s' % request_event.url)

+ urls_set.add(request_event.url)

+ url_subresources[trace.url] = [url for url in urls_set]

+ return url_subresources

+def ValidateCacheArchiveContent(ref_urls, cache_archive_path):

pasko 2016/04/21 18:21:44 Producing log messages on error is insufficient -

gabadie 2016/04/22 14:16:42 I don't want to block sandwich workflow because of

pasko 2016/04/25 13:29:06 nit: In browser development the term XMLHttpReques

gabadie 2016/04/27 08:32:16 Acknowledged.

+ """Validates a cache archive content.

+ Args:

+ ref_urls: Reference list of urls.

+ cache_archive_path: Cache archive's path to validate.

+ """

+ logging.info('lists cached urls from %s' % cache_archive_path)

+ with common_util.TemporaryDirectory() as cache_directory:

+ chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory)

+ cached_urls = \

+ chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys()

+ _PrintUrlSetComparison(set(ref_urls), set(cached_urls), 'cached resources')