tools/android/loading/sandwich_misc.py - Issue 1872313002: sandwich: Implement SandwichTaskBuilder

Side by Side Diff: tools/android/loading/sandwich_misc.py

Issue 1872313002: sandwich: Implement SandwichTaskBuilder (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Raises RuntimeError when encountering an unknown protocol Created 4 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 # Copyright 2016 The Chromium Authors. All rights reserved.	1 # Copyright 2016 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 import logging	5 import logging

	6 import json

	7 import os

6	8

	9 import chrome_cache

	10 import common_util

7 from loading_trace import LoadingTrace	11 from loading_trace import LoadingTrace

8 from prefetch_view import PrefetchSimulationView	12 from prefetch_view import PrefetchSimulationView

9 from request_dependencies_lens import RequestDependencyLens	13 from request_dependencies_lens import RequestDependencyLens

10 from user_satisfied_lens import FirstContentfulPaintLens	14 import sandwich_runner

11 import wpr_backend	15 import wpr_backend

12	16

13	17

	18 # Do not prefetch anything.

	19 EMPTY_CACHE_DISCOVERER = 'empty-cache'

	20

	21 # Prefetches everything to load fully from cache (impossible in practice).

	22 FULL_CACHE_DISCOVERER = 'full-cache'

	23

14 # Prefetches the first resource following the redirection chain.	24 # Prefetches the first resource following the redirection chain.

15 REDIRECTED_MAIN_DISCOVERER = 'redirected-main'	25 REDIRECTED_MAIN_DISCOVERER = 'redirected-main'

16	26

17 # All resources which are fetched from the main document and their redirections.	27 # All resources which are fetched from the main document and their redirections.

18 PARSER_DISCOVERER = 'parser'	28 PARSER_DISCOVERER = 'parser'

19	29

20 # Simulation of HTMLPreloadScanner on the main document and their redirections.	30 # Simulation of HTMLPreloadScanner on the main document and their redirections.

21 HTML_PRELOAD_SCANNER_DISCOVERER = 'html-scanner'	31 HTML_PRELOAD_SCANNER_DISCOVERER = 'html-scanner'

22	32

23 SUBRESOURCE_DISCOVERERS = set([	33 SUBRESOURCE_DISCOVERERS = set([

	34 EMPTY_CACHE_DISCOVERER,

	35 FULL_CACHE_DISCOVERER,

24 REDIRECTED_MAIN_DISCOVERER,	36 REDIRECTED_MAIN_DISCOVERER,

25 PARSER_DISCOVERER,	37 PARSER_DISCOVERER,

26 HTML_PRELOAD_SCANNER_DISCOVERER	38 HTML_PRELOAD_SCANNER_DISCOVERER

27 ])	39 ])

28	40

29	41

30 def PatchWpr(wpr_archive_path):	42 def PatchWpr(wpr_archive_path):

31 """Patches a WPR archive to get all resources into the HTTP cache and avoid	43 """Patches a WPR archive to get all resources into the HTTP cache and avoid

32 invalidation and revalidations.	44 invalidation and revalidations.

33	45

(...skipping 44 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
78 'unknown prefetch simulation {}'.format(subresource_discoverer)	90 'unknown prefetch simulation {}'.format(subresource_discoverer)

79	91

80 # Load trace and related infos.	92 # Load trace and related infos.

81 logging.info('loading %s' % loading_trace_path)	93 logging.info('loading %s' % loading_trace_path)

82 trace = LoadingTrace.FromJsonFile(loading_trace_path)	94 trace = LoadingTrace.FromJsonFile(loading_trace_path)

83 dependencies_lens = RequestDependencyLens(trace)	95 dependencies_lens = RequestDependencyLens(trace)

84 first_resource_request = trace.request_track.GetFirstResourceRequest()	96 first_resource_request = trace.request_track.GetFirstResourceRequest()

85	97

86 # Build the list of discovered requests according to the desired simulation.	98 # Build the list of discovered requests according to the desired simulation.

87 discovered_requests = []	99 discovered_requests = []

88 if subresource_discoverer == REDIRECTED_MAIN_DISCOVERER:	100 if subresource_discoverer == EMPTY_CACHE_DISCOVERER:

	101 pass

	102 elif subresource_discoverer == FULL_CACHE_DISCOVERER:

	103 discovered_requests = trace.request_track.GetEvents()

	104 elif subresource_discoverer == REDIRECTED_MAIN_DISCOVERER:

89 discovered_requests = \	105 discovered_requests = \

90 [dependencies_lens.GetRedirectChain(first_resource_request)[-1]]	106 [dependencies_lens.GetRedirectChain(first_resource_request)[-1]]

91 elif subresource_discoverer == PARSER_DISCOVERER:	107 elif subresource_discoverer == PARSER_DISCOVERER:

92 discovered_requests = PrefetchSimulationView.ParserDiscoverableRequests(	108 discovered_requests = PrefetchSimulationView.ParserDiscoverableRequests(

93 first_resource_request, dependencies_lens)	109 first_resource_request, dependencies_lens)

94 elif subresource_discoverer == HTML_PRELOAD_SCANNER_DISCOVERER:	110 elif subresource_discoverer == HTML_PRELOAD_SCANNER_DISCOVERER:

95 discovered_requests = PrefetchSimulationView.PreloadedRequests(	111 discovered_requests = PrefetchSimulationView.PreloadedRequests(

96 first_resource_request, dependencies_lens, trace)	112 first_resource_request, dependencies_lens, trace)

97 else:	113 else:

98 assert False	114 assert False

99	115

100 # Prune out data:// requests.	116 # Prune out data:// requests.

101 whitelisted_urls = set()	117 whitelisted_urls = set()

102 logging.info('white-listing %s' % first_resource_request.url)	118 logging.info('white-listing %s' % first_resource_request.url)

103 whitelisted_urls.add(first_resource_request.url)

104 for request in discovered_requests:	119 for request in discovered_requests:

105 # Work-around where the protocol may be none for an unclear reason yet.	120 # Work-around where the protocol may be none for an unclear reason yet.

106 # TODO(gabadie): Follow up on this with Clovis guys and possibly remove	121 # TODO(gabadie): Follow up on this with Clovis guys and possibly remove

107 # this work-around.	122 # this work-around.

108 if not request.protocol:	123 if not request.protocol:

109 logging.warning('ignoring %s (no protocol)' % request.url)	124 logging.warning('ignoring %s (no protocol)' % request.url)

110 continue	125 continue

111 # Ignore data protocols.	126 # Ignore data protocols.

112 if not request.protocol.startswith('http'):	127 if not request.protocol.startswith('http'):

113 continue	128 continue

114 logging.info('white-listing %s' % request.url)	129 logging.info('white-listing %s' % request.url)

115 whitelisted_urls.add(request.url)	130 whitelisted_urls.add(request.url)

116 return whitelisted_urls	131 return whitelisted_urls

	132

	133

	134 def _PrintUrlSetComparison(ref_url_set, url_set, url_set_name):

	135 """Compare URL sets and log the diffs.

	136

	137 Args:

	138 ref_url_set: Set of reference urls.

	139 url_set: Set of urls to compare to the reference.

	140 url_set_name: The set name for logging purposes.

	141 """

	142 assert type(ref_url_set) == set

	143 assert type(url_set) == set

	144 if ref_url_set == url_set:

	145 logging.info(' %d %s are matching.' % (len(ref_url_set), url_set_name))

	146 return

	147 logging.error(' %s are not matching.' % url_set_name)

	148 logging.error(' List of missing resources:')

	149 for url in ref_url_set.difference(url_set):

	150 logging.error('- ' + url)

	151 logging.error(' List of unexpected resources:')

	152 for url in url_set.difference(ref_url_set):

	153 logging.error('+ ' + url)

	154

	155

	156 class _RequestOutcome:

	157 All, ServedFromCache, NotServedFromCache = range(3)

	158

	159

	160 def _ListUrlRequests(trace, request_kind):

	161 """Lists requested URLs from a trace.

	162

	163 Args:

	164 trace: (LoadingTrace) loading trace.

	165 request_kind: _RequestOutcome indicating the subset of requests to output.

	166

	167 Returns:

	168 set([str])

	169 """

	170 urls = set()

	171 for request_event in trace.request_track.GetEvents():

	172 if request_event.protocol == None:

	173 continue

	174 if request_event.protocol.startswith('data'):

	175 continue

	176 if request_event.protocol.startswith('http'):

	177 raise RuntimeError('Unknown protocol {}'.format(request_event.protocol))

	178 if (request_kind == _RequestOutcome.ServedFromCache and

	179 request_event.from_disk_cache):

	180 urls.add(request_event.url)

	181 elif (request_kind == _RequestOutcome.NotServedFromCache and

	182 not request_event.from_disk_cache):

	183 urls.add(request_event.url)

	184 elif request_kind == _RequestOutcome.All:

	185 urls.add(request_event.url)

	186 return urls

	187

	188

	189 def VerifyBenchmarkOutputDirectory(benchmark_setup_path,

	190 benchmark_output_directory_path):

	191 """Verifies that all run inside the run_output_directory worked as expected.

	192

	193 Args:

	194 benchmark_setup_path: Path of the JSON of the benchmark setup.

	195 benchmark_output_directory_path: Path of the benchmark output directory to

	196 verify.

	197 """

	198 # TODO(gabadie): What's the best way of propagating errors happening in here?

	199 benchmark_setup = json.load(open(benchmark_setup_path))

	200 cache_whitelist = set(benchmark_setup['cache_whitelist'])

	201 url_resources = set(benchmark_setup['url_resources'])

	202

	203 # Verify requests from traces.

	204 run_id = -1

	205 while True:

	206 run_id += 1

	207 run_path = os.path.join(benchmark_output_directory_path, str(run_id))

	208 if not os.path.isdir(run_path):

	209 break

	210 trace_path = os.path.join(run_path, sandwich_runner.TRACE_FILENAME)

	211 if not os.path.isfile(trace_path):

	212 logging.error('missing trace %s' % trace_path)

	213 continue

	214 trace = LoadingTrace.FromJsonFile(trace_path)

	215 logging.info('verifying %s from %s' % (trace.url, trace_path))

	216 _PrintUrlSetComparison(url_resources,

	217 _ListUrlRequests(trace, _RequestOutcome.All), 'All resources')

	218 _PrintUrlSetComparison(url_resources.intersection(cache_whitelist),

	219 _ListUrlRequests(trace, _RequestOutcome.ServedFromCache),

	220 'Cached resources')

	221 _PrintUrlSetComparison(url_resources.difference(cache_whitelist),

	222 _ListUrlRequests(trace, _RequestOutcome.NotServedFromCache),

	223 'Non cached resources')

	224

	225

	226 def ReadSubresourceMapFromBenchmarkOutput(benchmark_output_directory_path):

	227 """Extracts a map URL-to-subresources for each navigation in benchmark

	228 directory.

	229

	230 Args:

	231 benchmark_output_directory_path: Path of the benchmark output directory to

	232 verify.

	233

	234 Returns:

	235 {url -> [URLs of sub-resources]}

	236 """

	237 url_subresources = {}

	238 run_id = -1

	239 while True:

	240 run_id += 1

	241 run_path = os.path.join(benchmark_output_directory_path, str(run_id))

	242 if not os.path.isdir(run_path):

	243 break

	244 trace_path = os.path.join(run_path, sandwich_runner.TRACE_FILENAME)

	245 if not os.path.isfile(trace_path):

	246 continue

	247 trace = LoadingTrace.FromJsonFile(trace_path)

	248 if trace.url in url_subresources:

	249 continue

	250 logging.info('lists resources of %s from %s' % (trace.url, trace_path))

	251 urls_set = set()

	252 for request_event in trace.request_track.GetEvents():

	253 if not request_event.protocol.startswith('http'):

	254 continue

	255 if request_event.url not in urls_set:

	256 logging.info(' %s' % request_event.url)

	257 urls_set.add(request_event.url)

	258 url_subresources[trace.url] = [url for url in urls_set]

	259 return url_subresources

	260

	261

	262 def ValidateCacheArchiveContent(ref_urls, cache_archive_path):

	263 """Validates a cache archive content.

	264

	265 Args:

	266 ref_urls: Reference list of urls.

	267 cache_archive_path: Cache archive's path to validate.

	268 """

	269 # TODO(gabadie): What's the best way of propagating errors happening in here?

	270 logging.info('lists cached urls from %s' % cache_archive_path)

	271 with common_util.TemporaryDirectory() as cache_directory:

	272 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory)

	273 cached_urls = \

	274 chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys()

	275 _PrintUrlSetComparison(set(ref_urls), set(cached_urls), 'cached resources')

OLD	NEW

« no previous file with comments | « tools/android/loading/sandwich_metrics_unittest.py ('k') | tools/android/loading/sandwich_misc_unittest.py » ('j') | no next file with comments »