tools/android/loading/sandwich_misc.py - Issue 1872313002: sandwich: Implement SandwichTaskBuilder

Side by Side Diff: tools/android/loading/sandwich_misc.py

Issue 1872313002: sandwich: Implement SandwichTaskBuilder (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Addresses pasko's comments and adds support for the different sub-resources discovrers Created 4 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 # Copyright 2016 The Chromium Authors. All rights reserved.	1 # Copyright 2016 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 import logging	5 import logging

	6 import json

	7 import os

6	8

	9 import chrome_cache

	10 import common_util

7 from loading_trace import LoadingTrace	11 from loading_trace import LoadingTrace

8 from prefetch_view import PrefetchSimulationView	12 from prefetch_view import PrefetchSimulationView

9 from request_dependencies_lens import RequestDependencyLens	13 from request_dependencies_lens import RequestDependencyLens

10 from user_satisfied_lens import FirstContentfulPaintLens

11 import wpr_backend	14 import wpr_backend

12	15

13	16

	17 # Don't prefetch anything
	pasko 2016/04/14 12:34:42 # Do not prefetch anything. (i.e. a full stop at # Do not prefetch anything. (i.e. a full stop at the end) gabadie 2016/04/14 15:43:32 Done. Show quoted text On 2016/04/14 12:34:42, pasko wrote: > # Do not prefetch anything. > > (i.e. a full stop at the end) Done.
	18 DISABLED_DISCOVERER = 'disabled'
	pasko 2016/04/14 12:34:42 is this used as part of file names? if so, seeing is this used as part of file names? if so, seeing 'disabled' as a file name would not obviously suggest _what_ is disabled. So seems this would be better: 'no-discoverer' or 'nocache'. gabadie 2016/04/14 15:43:32 Done. Show quoted text On 2016/04/14 12:34:42, pasko wrote: > is this used as part of file names? if so, seeing 'disabled' as a file name > would not obviously suggest _what_ is disabled. > > So seems this would be better: 'no-discoverer' or 'nocache'. Done.
	19

	20 # Prefetches everything to load fully from cache (impossible in practice).

	21 FULLCACHE_DISCOVERER = 'fullcache'
	pasko 2016/04/14 12:34:42 nit: prefer words separated by dashes in file name nit: prefer words separated by dashes in file names: 'no-cache', 'full-cache'. This is minor, up to you. gabadie 2016/04/14 15:43:32 Done. Show quoted text On 2016/04/14 12:34:42, pasko wrote: > nit: prefer words separated by dashes in file names: 'no-cache', 'full-cache'. > > This is minor, up to you. Done.
	22

14 # Prefetches the first resource following the redirection chain.	23 # Prefetches the first resource following the redirection chain.

15 REDIRECTED_MAIN_DISCOVERER = 'redirected-main'	24 REDIRECTED_MAIN_DISCOVERER = 'redirected-main'

16	25

17 # All resources which are fetched from the main document and their redirections.	26 # All resources which are fetched from the main document and their redirections.

18 PARSER_DISCOVERER = 'parser',	27 PARSER_DISCOVERER = 'parser'

19	28

20 # Simulation of HTMLPreloadScanner on the main document and their redirections.	29 # Simulation of HTMLPreloadScanner on the main document and their redirections.

21 HTML_PRELOAD_SCANNER_DISCOVERER = 'html-scanner',	30 HTML_PRELOAD_SCANNER_DISCOVERER = 'html-scanner'

22	31

23 SUBRESOURCE_DISCOVERERS = set([	32 SUBRESOURCE_DISCOVERERS = set([

	33 DISABLED_DISCOVERER,

	34 FULLCACHE_DISCOVERER,

24 REDIRECTED_MAIN_DISCOVERER,	35 REDIRECTED_MAIN_DISCOVERER,

25 PARSER_DISCOVERER,	36 PARSER_DISCOVERER,

26 HTML_PRELOAD_SCANNER_DISCOVERER	37 HTML_PRELOAD_SCANNER_DISCOVERER

27 ])	38 ])

28	39

29	40

30 def PatchWpr(wpr_archive_path):	41 def PatchWpr(wpr_archive_path):

31 """Patches a WPR archive to get all resources into the HTTP cache and avoid	42 """Patches a WPR archive to get all resources into the HTTP cache and avoid

32 invalidation and revalidations.	43 invalidation and revalidations.

33	44

(...skipping 44 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
78 'unknown prefetch simulation {}'.format(subresource_discoverer)	89 'unknown prefetch simulation {}'.format(subresource_discoverer)

79	90

80 # Load trace and related infos.	91 # Load trace and related infos.

81 logging.info('loading %s' % loading_trace_path)	92 logging.info('loading %s' % loading_trace_path)

82 trace = LoadingTrace.FromJsonFile(loading_trace_path)	93 trace = LoadingTrace.FromJsonFile(loading_trace_path)

83 dependencies_lens = RequestDependencyLens(trace)	94 dependencies_lens = RequestDependencyLens(trace)

84 first_resource_request = trace.request_track.GetFirstResourceRequest()	95 first_resource_request = trace.request_track.GetFirstResourceRequest()

85	96

86 # Build the list of discovered requests according to the desired simulation.	97 # Build the list of discovered requests according to the desired simulation.

87 discovered_requests = []	98 discovered_requests = []

88 if subresource_discoverer == REDIRECTED_MAIN_DISCOVERER:	99 if subresource_discoverer == DISABLED_DISCOVERER:

	100 pass

	101 elif subresource_discoverer == FULLCACHE_DISCOVERER:

	102 discovered_requests = trace.request_track.GetEvents()

	103 elif subresource_discoverer == REDIRECTED_MAIN_DISCOVERER:

89 discovered_requests = \	104 discovered_requests = \

90 [dependencies_lens.GetRedirectChain(first_resource_request)[-1]]	105 [dependencies_lens.GetRedirectChain(first_resource_request)[-1]]

91 elif subresource_discoverer == PARSER_DISCOVERER:	106 elif subresource_discoverer == PARSER_DISCOVERER:

92 discovered_requests = PrefetchSimulationView.ParserDiscoverableRequests(	107 discovered_requests = PrefetchSimulationView.ParserDiscoverableRequests(

93 first_resource_request, dependencies_lens)	108 first_resource_request, dependencies_lens)

94 elif subresource_discoverer == HTML_PRELOAD_SCANNER_DISCOVERER:	109 elif subresource_discoverer == HTML_PRELOAD_SCANNER_DISCOVERER:

95 discovered_requests = PrefetchSimulationView.PreloadedRequests(	110 discovered_requests = PrefetchSimulationView.PreloadedRequests(

96 first_resource_request, dependencies_lens, trace)	111 first_resource_request, dependencies_lens, trace)

97 else:	112 else:

98 assert False	113 assert False

99	114

100 # Prune out data:// requests.	115 # Prune out data:// requests.

101 whitelisted_urls = set()	116 whitelisted_urls = set()

102 logging.info('white-listing %s' % first_resource_request.url)	117 logging.info('white-listing %s' % first_resource_request.url)

103 whitelisted_urls.add(first_resource_request.url)

104 for request in discovered_requests:	118 for request in discovered_requests:

105 # Work-around where the protocol may be none for an unclear reason yet.	119 # Work-around where the protocol may be none for an unclear reason yet.

106 # TODO(gabadie): Follow up on this with Clovis guys and possibly remove	120 # TODO(gabadie): Follow up on this with Clovis guys and possibly remove

107 # this work-around.	121 # this work-around.

108 if not request.protocol:	122 if not request.protocol:

109 logging.warning('ignoring %s (no protocol)' % request.url)	123 logging.warning('ignoring %s (no protocol)' % request.url)

110 continue	124 continue

111 # Ignore data protocols.	125 # Ignore data protocols.

112 if not request.protocol.startswith('http'):	126 if not request.protocol.startswith('http'):

113 continue	127 continue

114 logging.info('white-listing %s' % request.url)	128 logging.info('white-listing %s' % request.url)

115 whitelisted_urls.add(request.url)	129 whitelisted_urls.add(request.url)

116 return whitelisted_urls	130 return whitelisted_urls

	131

	132

	133 def CompareUrlSet(ref_url_set, url_set, url_set_name, debug_hint='Good luck!'):

	134 """Compare URL sets

	135

	136 Args:

	137 ref_url_set: Set of reference urls.

	138 url_set: Set of urls to compare to the reference.

	139 url_set_name: The set name for logging purposes.

	140 debug_hint: A debug hint to help debugging in any case the sets are

	141 different.

	142 """

	143 assert type(ref_url_set) == set

	144 assert type(url_set) == set

	145 if ref_url_set == url_set:

	146 logging.info(' %d %s are matching.' % (len(ref_url_set), url_set_name))

	147 return

	148 logging.error(' %s are not matching.' % url_set_name)

	149 logging.error(' Hint: ' + debug_hint)

	150 logging.error(' List of missing resources:')

	151 for url in ref_url_set.difference(url_set):

	152 logging.error('- ' + url)

	153 logging.error(' List of unexpected resources:')

	154 for url in url_set.difference(ref_url_set):

	155 logging.error('+ ' + url)

	156

	157

	158 def _ListUrlRequests(trace, from_cache=None):

	159 urls = set()

	160 for request_event in trace.request_track.GetEvents():

	161 if request_event.protocol == None:

	162 continue

	163 if not request_event.protocol.startswith('http'):

	164 continue

	165 if from_cache is not None and request_event.from_disk_cache != from_cache:

	166 continue

	167 urls.add(request_event.url)

	168 return urls

	169

	170

	171 def VerifyBenchmarkOutputDirectory(benchmark_setup_path,

	172 benchmark_output_directory_path):

	173 """Verifies that all run inside the run_output_directory worked as expected.

	174

	175 Args:

	176 benchmark_setup_path: Path of the JSON of the benchmark setup.

	177 benchmark_output_directory_path: Path of the benchmark output directory to

	178 verify.

	179 """

	180 benchmark_setup = json.load(open(benchmark_setup_path))

	181 cache_whitelist = set(benchmark_setup['cache_whitelist'])

	182 url_resources = set(benchmark_setup['url_resources'])

	183

	184 # Verify requests from traces.

	185 run_id = -1

	186 while True:

	187 run_id += 1

	188 run_path = os.path.join(benchmark_output_directory_path, str(run_id))

	189 if not os.path.isdir(run_path):

	190 break

	191 trace_path = os.path.join(run_path, 'trace.json')

	192 if not os.path.isfile(trace_path):

	193 logging.error('missing trace %s' % trace_path)

	194 continue

	195 trace = LoadingTrace.FromJsonFile(trace_path)

	196 logging.info('verifying %s from %s' % (trace.url, trace_path))

	197 CompareUrlSet(url_resources, _ListUrlRequests(trace), 'All resources',

	198 'You may have an issue with an AJAX requests.')

	199 CompareUrlSet(url_resources.intersection(cache_whitelist),

	200 _ListUrlRequests(trace, True), 'Cached resources',

	201 'The WPR archive patcher may have an invalidation issue.')

	202 CompareUrlSet(url_resources.difference(cache_whitelist),

	203 _ListUrlRequests(trace, False), 'Non cached resources')

	204

	205

	206 def ListResourcesUrls(benchmark_output_directory_path):

	207 """Lists all requested urls per navigated urls

	208

	209 Args:

	210 benchmark_output_directory_path: Path of the benchmark output directory to

	211 verify.

	212

	213 Returns:

	214 {url -> [urls of sub-resources]}

	215 """

	216 url_subresources = {}

	217 run_id = -1

	218 while True:

	219 run_id += 1

	220 run_path = os.path.join(benchmark_output_directory_path, str(run_id))

	221 if not os.path.isdir(run_path):

	222 break

	223 trace_path = os.path.join(run_path, 'trace.json')

	224 if not os.path.isfile(trace_path):

	225 continue

	226 trace = LoadingTrace.FromJsonFile(trace_path)

	227 if trace.url in url_subresources:

	228 continue

	229 logging.info('lists resources of %s from %s' % (trace.url, trace_path))

	230 urls_set = set()

	231 for request_event in trace.request_track.GetEvents():

	232 if not request_event.protocol.startswith('http'):

	233 continue

	234 if request_event.url not in urls_set:

	235 logging.info(' %s' % request_event.url)

	236 urls_set.add(request_event.url)

	237 url_subresources[trace.url] = [url for url in urls_set]

	238 return url_subresources

	239

	240

	241 def ValidateCacheArchiveContent(ref_urls, cache_archive_path):

	242 """Validates a cache archive content.

	243

	244 Args:

	245 ref_urls: Reference list of urls.

	246 cache_archive_path: Cache archive's path to validate.

	247 """

	248 logging.info('lists cached urls from %s' % cache_archive_path)

	249 with common_util.TemporaryDirectory() as cache_directory:

	250 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory)

	251 cached_urls = \

	252 chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys()

	253 CompareUrlSet(set(ref_urls), set(cached_urls), 'cached resources',

	254 debug_hint='Looks like a response header needs to be patched.')

OLD	NEW

« tools/android/loading/sandwich.py ('K') | « tools/android/loading/sandwich.py ('k') | tools/android/loading/sandwich_tasks.py » ('j') | tools/android/loading/sandwich_tasks.py » ('J')