tools/android/loading/sandwich_misc.py - Issue 1872313002: sandwich: Implement SandwichTaskBuilder

Side by Side Diff: tools/android/loading/sandwich_misc.py

Issue 1872313002: sandwich: Implement SandwichTaskBuilder (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Addresses pasko's comment Created 4 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 # Copyright 2016 The Chromium Authors. All rights reserved.	1 # Copyright 2016 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 import logging	5 import logging

	6 import json

	7 import os

6	8

	9 import chrome_cache

	10 import common_util

7 from loading_trace import LoadingTrace	11 from loading_trace import LoadingTrace

8 from prefetch_view import PrefetchSimulationView	12 from prefetch_view import PrefetchSimulationView

9 from request_dependencies_lens import RequestDependencyLens	13 from request_dependencies_lens import RequestDependencyLens

10 from user_satisfied_lens import FirstContentfulPaintLens	14 import sandwich_runner

11 import wpr_backend	15 import wpr_backend

12	16

13	17

	18 # Do not prefetch anything.

	19 EMPTY_CACHE_DISCOVERER = 'empty-cache'

	20

	21 # Prefetches everything to load fully from cache (impossible in practice).

	22 FULL_CACHE_DISCOVERER = 'full-cache'

	23

14 # Prefetches the first resource following the redirection chain.	24 # Prefetches the first resource following the redirection chain.

15 REDIRECTED_MAIN_DISCOVERER = 'redirected-main'	25 REDIRECTED_MAIN_DISCOVERER = 'redirected-main'

16	26

17 # All resources which are fetched from the main document and their redirections.	27 # All resources which are fetched from the main document and their redirections.

18 PARSER_DISCOVERER = 'parser'	28 PARSER_DISCOVERER = 'parser'

19	29

20 # Simulation of HTMLPreloadScanner on the main document and their redirections.	30 # Simulation of HTMLPreloadScanner on the main document and their redirections.

21 HTML_PRELOAD_SCANNER_DISCOVERER = 'html-scanner'	31 HTML_PRELOAD_SCANNER_DISCOVERER = 'html-scanner'

22	32

23 SUBRESOURCE_DISCOVERERS = set([	33 SUBRESOURCE_DISCOVERERS = set([

	34 EMPTY_CACHE_DISCOVERER,

	35 FULL_CACHE_DISCOVERER,

24 REDIRECTED_MAIN_DISCOVERER,	36 REDIRECTED_MAIN_DISCOVERER,

25 PARSER_DISCOVERER,	37 PARSER_DISCOVERER,

26 HTML_PRELOAD_SCANNER_DISCOVERER	38 HTML_PRELOAD_SCANNER_DISCOVERER

27 ])	39 ])

28	40

29	41

30 def PatchWpr(wpr_archive_path):	42 def PatchWpr(wpr_archive_path):

31 """Patches a WPR archive to get all resources into the HTTP cache and avoid	43 """Patches a WPR archive to get all resources into the HTTP cache and avoid

32 invalidation and revalidations.	44 invalidation and revalidations.

33	45

(...skipping 44 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
78 'unknown prefetch simulation {}'.format(subresource_discoverer)	90 'unknown prefetch simulation {}'.format(subresource_discoverer)

79	91

80 # Load trace and related infos.	92 # Load trace and related infos.

81 logging.info('loading %s' % loading_trace_path)	93 logging.info('loading %s' % loading_trace_path)

82 trace = LoadingTrace.FromJsonFile(loading_trace_path)	94 trace = LoadingTrace.FromJsonFile(loading_trace_path)

83 dependencies_lens = RequestDependencyLens(trace)	95 dependencies_lens = RequestDependencyLens(trace)

84 first_resource_request = trace.request_track.GetFirstResourceRequest()	96 first_resource_request = trace.request_track.GetFirstResourceRequest()

85	97

86 # Build the list of discovered requests according to the desired simulation.	98 # Build the list of discovered requests according to the desired simulation.

87 discovered_requests = []	99 discovered_requests = []

88 if subresource_discoverer == REDIRECTED_MAIN_DISCOVERER:	100 if subresource_discoverer == EMPTY_CACHE_DISCOVERER:

	101 pass

	102 elif subresource_discoverer == FULL_CACHE_DISCOVERER:

	103 discovered_requests = trace.request_track.GetEvents()

	104 elif subresource_discoverer == REDIRECTED_MAIN_DISCOVERER:

89 discovered_requests = \	105 discovered_requests = \

90 [dependencies_lens.GetRedirectChain(first_resource_request)[-1]]	106 [dependencies_lens.GetRedirectChain(first_resource_request)[-1]]

91 elif subresource_discoverer == PARSER_DISCOVERER:	107 elif subresource_discoverer == PARSER_DISCOVERER:

92 discovered_requests = PrefetchSimulationView.ParserDiscoverableRequests(	108 discovered_requests = PrefetchSimulationView.ParserDiscoverableRequests(

93 first_resource_request, dependencies_lens)	109 first_resource_request, dependencies_lens)

94 elif subresource_discoverer == HTML_PRELOAD_SCANNER_DISCOVERER:	110 elif subresource_discoverer == HTML_PRELOAD_SCANNER_DISCOVERER:

95 discovered_requests = PrefetchSimulationView.PreloadedRequests(	111 discovered_requests = PrefetchSimulationView.PreloadedRequests(

96 first_resource_request, dependencies_lens, trace)	112 first_resource_request, dependencies_lens, trace)

97 else:	113 else:

98 assert False	114 assert False

99	115

100 # Prune out data:// requests.	116 # Prune out data:// requests.

101 whitelisted_urls = set()	117 whitelisted_urls = set()

102 logging.info('white-listing %s' % first_resource_request.url)	118 logging.info('white-listing %s' % first_resource_request.url)

103 whitelisted_urls.add(first_resource_request.url)

104 for request in discovered_requests:	119 for request in discovered_requests:

105 # Work-around where the protocol may be none for an unclear reason yet.	120 # Work-around where the protocol may be none for an unclear reason yet.

106 # TODO(gabadie): Follow up on this with Clovis guys and possibly remove	121 # TODO(gabadie): Follow up on this with Clovis guys and possibly remove

107 # this work-around.	122 # this work-around.

108 if not request.protocol:	123 if not request.protocol:

109 logging.warning('ignoring %s (no protocol)' % request.url)	124 logging.warning('ignoring %s (no protocol)' % request.url)

110 continue	125 continue

111 # Ignore data protocols.	126 # Ignore data protocols.

112 if not request.protocol.startswith('http'):	127 if not request.protocol.startswith('http'):

113 continue	128 continue

114 logging.info('white-listing %s' % request.url)	129 logging.info('white-listing %s' % request.url)

115 whitelisted_urls.add(request.url)	130 whitelisted_urls.add(request.url)

116 return whitelisted_urls	131 return whitelisted_urls

	132

	133

	134 def _PrintUrlSetComparison(ref_url_set, url_set, url_set_name):

	135 """Compare URL sets and log the diffs.

	136

	137 Args:

	138 ref_url_set: Set of reference urls.

	139 url_set: Set of urls to compare to the reference.

	140 url_set_name: The set name for logging purposes.

	141 """

	142 assert type(ref_url_set) == set

	143 assert type(url_set) == set

	144 if ref_url_set == url_set:

	145 logging.info(' %d %s are matching.' % (len(ref_url_set), url_set_name))

	146 return

	147 logging.error(' %s are not matching.' % url_set_name)

	148 logging.error(' List of missing resources:')

	149 for url in ref_url_set.difference(url_set):

	150 logging.error('- ' + url)

	151 logging.error(' List of unexpected resources:')

	152 for url in url_set.difference(ref_url_set):

	153 logging.error('+ ' + url)

	154

	155

	156 class _RequestOutcome:

	157 All, ServedFromCache, NotServedFromCache = range(3)

	158

	159

	160 def _ListUrlRequests(trace, request_kind):

	161 """Lists requested URLs from a trace.

	162

	163 Args:

	164 trace: The trace.
	pasko 2016/04/25 13:29:06 trace: (LoadingTrace) loading trace. trace: (LoadingTrace) loading trace. gabadie 2016/04/27 08:32:16 Done. Show quoted text On 2016/04/25 13:29:06, pasko wrote: > trace: (LoadingTrace) loading trace. Done.
	165 request_kind: _RequestOutcome indicating the subset of requests to output.

	166

	167 Returns:

	168 set([str])

	169 """

	170 urls = set()

	171 for request_event in trace.request_track.GetEvents():

	172 if request_event.protocol == None:

	173 continue

	174 if request_event.protocol.startswith('data'):

	175 continue

	176 assert request_event.protocol.startswith('http')
	pasko 2016/04/25 13:29:06 I can remember you preferring to avoid asserts on I can remember you preferring to avoid asserts on things that are dependent on user input. Maybe raise Exception then? Up to you. gabadie 2016/04/27 08:32:16 Yes but here it is not an user input related check Show quoted text On 2016/04/25 13:29:06, pasko wrote: > I can remember you preferring to avoid asserts on things that are dependent on > user input. Maybe raise Exception then? > > Up to you. Yes but here it is not an user input related check. The point of this assert is to internally break whenever we are facing an unexpected protocol that we need to understand to handle correctly rather than assuming it works like HTTP. pasko 2016/04/27 08:50:58 I disagree, it depends on what a user puts into th Show quoted text On 2016/04/27 08:32:16, gabadie wrote: > On 2016/04/25 13:29:06, pasko wrote: > > I can remember you preferring to avoid asserts on things that are dependent on > > user input. Maybe raise Exception then? > > > > Up to you. > > Yes but here it is not an user input related check. I disagree, it depends on what a user puts into the job description. Show quoted text > The point of this assert is to internally break whenever we are facing an unexpected protocol > that we need to understand to handle correctly rather than assuming it works like HTTP. no problem, really. I prefer this rule: "Assertions should be used to check something that should never happen, while an exception should be used to check something that might happen." http://stackoverflow.com/questions/1957645/when-to-use-an-assertion-and-when-... It is minor in this case.
	177 if (request_kind == _RequestOutcome.ServedFromCache and

	178 request_event.from_disk_cache):

	179 urls.add(request_event.url)

	180 elif (request_kind == _RequestOutcome.NotServedFromCache and

	181 not request_event.from_disk_cache):

	182 urls.add(request_event.url)

	183 elif request_kind == _RequestOutcome.All:

	184 urls.add(request_event.url)

	185 return urls

	186

	187

	188 def VerifyBenchmarkOutputDirectory(benchmark_setup_path,

	189 benchmark_output_directory_path):

	190 """Verifies that all run inside the run_output_directory worked as expected.

	191

	192 Args:

	193 benchmark_setup_path: Path of the JSON of the benchmark setup.

	194 benchmark_output_directory_path: Path of the benchmark output directory to

	195 verify.

	196 """

	197 # TODO(gabadie): What's the best way of propagating errors happening in here?

	198 benchmark_setup = json.load(open(benchmark_setup_path))

	199 cache_whitelist = set(benchmark_setup['cache_whitelist'])

	200 url_resources = set(benchmark_setup['url_resources'])

	201

	202 # Verify requests from traces.

	203 run_id = -1

	204 while True:

	205 run_id += 1

	206 run_path = os.path.join(benchmark_output_directory_path, str(run_id))

	207 if not os.path.isdir(run_path):

	208 break

	209 trace_path = os.path.join(run_path, sandwich_runner.TRACE_FILENAME)

	210 if not os.path.isfile(trace_path):

	211 logging.error('missing trace %s' % trace_path)

	212 continue

	213 trace = LoadingTrace.FromJsonFile(trace_path)

	214 logging.info('verifying %s from %s' % (trace.url, trace_path))

	215 _PrintUrlSetComparison(url_resources,

	216 _ListUrlRequests(trace, _RequestOutcome.All), 'All resources')

	217 _PrintUrlSetComparison(url_resources.intersection(cache_whitelist),

	218 _ListUrlRequests(trace, _RequestOutcome.ServedFromCache),

	219 'Cached resources')

	220 _PrintUrlSetComparison(url_resources.difference(cache_whitelist),

	221 _ListUrlRequests(trace, _RequestOutcome.NotServedFromCache),

	222 'Non cached resources')

	223

	224

	225 def ReadSubresourceMapFromBenchmarkOutput(benchmark_output_directory_path):

	226 """Extracts a map URL-to-subresources for each navigation in benchmark

	227 directory.

	228

	229 Args:

	230 benchmark_output_directory_path: Path of the benchmark output directory to

	231 verify.

	232

	233 Returns:

	234 {url -> [URLs of sub-resources]}

	235 """

	236 url_subresources = {}

	237 run_id = -1

	238 while True:

	239 run_id += 1

	240 run_path = os.path.join(benchmark_output_directory_path, str(run_id))

	241 if not os.path.isdir(run_path):

	242 break

	243 trace_path = os.path.join(run_path, sandwich_runner.TRACE_FILENAME)

	244 if not os.path.isfile(trace_path):

	245 continue

	246 trace = LoadingTrace.FromJsonFile(trace_path)

	247 if trace.url in url_subresources:

	248 continue

	249 logging.info('lists resources of %s from %s' % (trace.url, trace_path))

	250 urls_set = set()

	251 for request_event in trace.request_track.GetEvents():

	252 if not request_event.protocol.startswith('http'):

	253 continue

	254 if request_event.url not in urls_set:

	255 logging.info(' %s' % request_event.url)

	256 urls_set.add(request_event.url)

	257 url_subresources[trace.url] = [url for url in urls_set]

	258 return url_subresources

	259

	260

	261 def ValidateCacheArchiveContent(ref_urls, cache_archive_path):

	262 """Validates a cache archive content.

	263

	264 Args:

	265 ref_urls: Reference list of urls.

	266 cache_archive_path: Cache archive's path to validate.

	267 """

	268 # TODO(gabadie): What's the best way of propagating errors happening in here?

	269 logging.info('lists cached urls from %s' % cache_archive_path)

	270 with common_util.TemporaryDirectory() as cache_directory:

	271 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory)

	272 cached_urls = \

	273 chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys()

	274 _PrintUrlSetComparison(set(ref_urls), set(cached_urls), 'cached resources')

OLD	NEW

« no previous file with comments | « tools/android/loading/sandwich_metrics_unittest.py ('k') | tools/android/loading/sandwich_misc_unittest.py » ('j') | no next file with comments »