Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(208)

Side by Side Diff: tools/android/loading/sandwich_misc.py

Issue 1872313002: sandwich: Implement SandwichTaskBuilder (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Addresses pasko's comments and adds support for the different sub-resources discovrers Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2016 The Chromium Authors. All rights reserved. 1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import logging 5 import logging
6 import json
7 import os
6 8
9 import chrome_cache
10 import common_util
7 from loading_trace import LoadingTrace 11 from loading_trace import LoadingTrace
8 from prefetch_view import PrefetchSimulationView 12 from prefetch_view import PrefetchSimulationView
9 from request_dependencies_lens import RequestDependencyLens 13 from request_dependencies_lens import RequestDependencyLens
10 from user_satisfied_lens import FirstContentfulPaintLens
11 import wpr_backend 14 import wpr_backend
12 15
13 16
17 # Don't prefetch anything
pasko 2016/04/14 12:34:42 # Do not prefetch anything. (i.e. a full stop at
gabadie 2016/04/14 15:43:32 Done.
18 DISABLED_DISCOVERER = 'disabled'
pasko 2016/04/14 12:34:42 is this used as part of file names? if so, seeing
gabadie 2016/04/14 15:43:32 Done.
19
20 # Prefetches everything to load fully from cache (impossible in practice).
21 FULLCACHE_DISCOVERER = 'fullcache'
pasko 2016/04/14 12:34:42 nit: prefer words separated by dashes in file name
gabadie 2016/04/14 15:43:32 Done.
22
14 # Prefetches the first resource following the redirection chain. 23 # Prefetches the first resource following the redirection chain.
15 REDIRECTED_MAIN_DISCOVERER = 'redirected-main' 24 REDIRECTED_MAIN_DISCOVERER = 'redirected-main'
16 25
17 # All resources which are fetched from the main document and their redirections. 26 # All resources which are fetched from the main document and their redirections.
18 PARSER_DISCOVERER = 'parser', 27 PARSER_DISCOVERER = 'parser'
19 28
20 # Simulation of HTMLPreloadScanner on the main document and their redirections. 29 # Simulation of HTMLPreloadScanner on the main document and their redirections.
21 HTML_PRELOAD_SCANNER_DISCOVERER = 'html-scanner', 30 HTML_PRELOAD_SCANNER_DISCOVERER = 'html-scanner'
22 31
23 SUBRESOURCE_DISCOVERERS = set([ 32 SUBRESOURCE_DISCOVERERS = set([
33 DISABLED_DISCOVERER,
34 FULLCACHE_DISCOVERER,
24 REDIRECTED_MAIN_DISCOVERER, 35 REDIRECTED_MAIN_DISCOVERER,
25 PARSER_DISCOVERER, 36 PARSER_DISCOVERER,
26 HTML_PRELOAD_SCANNER_DISCOVERER 37 HTML_PRELOAD_SCANNER_DISCOVERER
27 ]) 38 ])
28 39
29 40
30 def PatchWpr(wpr_archive_path): 41 def PatchWpr(wpr_archive_path):
31 """Patches a WPR archive to get all resources into the HTTP cache and avoid 42 """Patches a WPR archive to get all resources into the HTTP cache and avoid
32 invalidation and revalidations. 43 invalidation and revalidations.
33 44
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
78 'unknown prefetch simulation {}'.format(subresource_discoverer) 89 'unknown prefetch simulation {}'.format(subresource_discoverer)
79 90
80 # Load trace and related infos. 91 # Load trace and related infos.
81 logging.info('loading %s' % loading_trace_path) 92 logging.info('loading %s' % loading_trace_path)
82 trace = LoadingTrace.FromJsonFile(loading_trace_path) 93 trace = LoadingTrace.FromJsonFile(loading_trace_path)
83 dependencies_lens = RequestDependencyLens(trace) 94 dependencies_lens = RequestDependencyLens(trace)
84 first_resource_request = trace.request_track.GetFirstResourceRequest() 95 first_resource_request = trace.request_track.GetFirstResourceRequest()
85 96
86 # Build the list of discovered requests according to the desired simulation. 97 # Build the list of discovered requests according to the desired simulation.
87 discovered_requests = [] 98 discovered_requests = []
88 if subresource_discoverer == REDIRECTED_MAIN_DISCOVERER: 99 if subresource_discoverer == DISABLED_DISCOVERER:
100 pass
101 elif subresource_discoverer == FULLCACHE_DISCOVERER:
102 discovered_requests = trace.request_track.GetEvents()
103 elif subresource_discoverer == REDIRECTED_MAIN_DISCOVERER:
89 discovered_requests = \ 104 discovered_requests = \
90 [dependencies_lens.GetRedirectChain(first_resource_request)[-1]] 105 [dependencies_lens.GetRedirectChain(first_resource_request)[-1]]
91 elif subresource_discoverer == PARSER_DISCOVERER: 106 elif subresource_discoverer == PARSER_DISCOVERER:
92 discovered_requests = PrefetchSimulationView.ParserDiscoverableRequests( 107 discovered_requests = PrefetchSimulationView.ParserDiscoverableRequests(
93 first_resource_request, dependencies_lens) 108 first_resource_request, dependencies_lens)
94 elif subresource_discoverer == HTML_PRELOAD_SCANNER_DISCOVERER: 109 elif subresource_discoverer == HTML_PRELOAD_SCANNER_DISCOVERER:
95 discovered_requests = PrefetchSimulationView.PreloadedRequests( 110 discovered_requests = PrefetchSimulationView.PreloadedRequests(
96 first_resource_request, dependencies_lens, trace) 111 first_resource_request, dependencies_lens, trace)
97 else: 112 else:
98 assert False 113 assert False
99 114
100 # Prune out data:// requests. 115 # Prune out data:// requests.
101 whitelisted_urls = set() 116 whitelisted_urls = set()
102 logging.info('white-listing %s' % first_resource_request.url) 117 logging.info('white-listing %s' % first_resource_request.url)
103 whitelisted_urls.add(first_resource_request.url)
104 for request in discovered_requests: 118 for request in discovered_requests:
105 # Work-around where the protocol may be none for an unclear reason yet. 119 # Work-around where the protocol may be none for an unclear reason yet.
106 # TODO(gabadie): Follow up on this with Clovis guys and possibly remove 120 # TODO(gabadie): Follow up on this with Clovis guys and possibly remove
107 # this work-around. 121 # this work-around.
108 if not request.protocol: 122 if not request.protocol:
109 logging.warning('ignoring %s (no protocol)' % request.url) 123 logging.warning('ignoring %s (no protocol)' % request.url)
110 continue 124 continue
111 # Ignore data protocols. 125 # Ignore data protocols.
112 if not request.protocol.startswith('http'): 126 if not request.protocol.startswith('http'):
113 continue 127 continue
114 logging.info('white-listing %s' % request.url) 128 logging.info('white-listing %s' % request.url)
115 whitelisted_urls.add(request.url) 129 whitelisted_urls.add(request.url)
116 return whitelisted_urls 130 return whitelisted_urls
131
132
133 def CompareUrlSet(ref_url_set, url_set, url_set_name, debug_hint='Good luck!'):
134 """Compare URL sets
135
136 Args:
137 ref_url_set: Set of reference urls.
138 url_set: Set of urls to compare to the reference.
139 url_set_name: The set name for logging purposes.
140 debug_hint: A debug hint to help debugging in any case the sets are
141 different.
142 """
143 assert type(ref_url_set) == set
144 assert type(url_set) == set
145 if ref_url_set == url_set:
146 logging.info(' %d %s are matching.' % (len(ref_url_set), url_set_name))
147 return
148 logging.error(' %s are not matching.' % url_set_name)
149 logging.error(' Hint: ' + debug_hint)
150 logging.error(' List of missing resources:')
151 for url in ref_url_set.difference(url_set):
152 logging.error('- ' + url)
153 logging.error(' List of unexpected resources:')
154 for url in url_set.difference(ref_url_set):
155 logging.error('+ ' + url)
156
157
158 def _ListUrlRequests(trace, from_cache=None):
159 urls = set()
160 for request_event in trace.request_track.GetEvents():
161 if request_event.protocol == None:
162 continue
163 if not request_event.protocol.startswith('http'):
164 continue
165 if from_cache is not None and request_event.from_disk_cache != from_cache:
166 continue
167 urls.add(request_event.url)
168 return urls
169
170
171 def VerifyBenchmarkOutputDirectory(benchmark_setup_path,
172 benchmark_output_directory_path):
173 """Verifies that all run inside the run_output_directory worked as expected.
174
175 Args:
176 benchmark_setup_path: Path of the JSON of the benchmark setup.
177 benchmark_output_directory_path: Path of the benchmark output directory to
178 verify.
179 """
180 benchmark_setup = json.load(open(benchmark_setup_path))
181 cache_whitelist = set(benchmark_setup['cache_whitelist'])
182 url_resources = set(benchmark_setup['url_resources'])
183
184 # Verify requests from traces.
185 run_id = -1
186 while True:
187 run_id += 1
188 run_path = os.path.join(benchmark_output_directory_path, str(run_id))
189 if not os.path.isdir(run_path):
190 break
191 trace_path = os.path.join(run_path, 'trace.json')
192 if not os.path.isfile(trace_path):
193 logging.error('missing trace %s' % trace_path)
194 continue
195 trace = LoadingTrace.FromJsonFile(trace_path)
196 logging.info('verifying %s from %s' % (trace.url, trace_path))
197 CompareUrlSet(url_resources, _ListUrlRequests(trace), 'All resources',
198 'You may have an issue with an AJAX requests.')
199 CompareUrlSet(url_resources.intersection(cache_whitelist),
200 _ListUrlRequests(trace, True), 'Cached resources',
201 'The WPR archive patcher may have an invalidation issue.')
202 CompareUrlSet(url_resources.difference(cache_whitelist),
203 _ListUrlRequests(trace, False), 'Non cached resources')
204
205
206 def ListResourcesUrls(benchmark_output_directory_path):
207 """Lists all requested urls per navigated urls
208
209 Args:
210 benchmark_output_directory_path: Path of the benchmark output directory to
211 verify.
212
213 Returns:
214 {url -> [urls of sub-resources]}
215 """
216 url_subresources = {}
217 run_id = -1
218 while True:
219 run_id += 1
220 run_path = os.path.join(benchmark_output_directory_path, str(run_id))
221 if not os.path.isdir(run_path):
222 break
223 trace_path = os.path.join(run_path, 'trace.json')
224 if not os.path.isfile(trace_path):
225 continue
226 trace = LoadingTrace.FromJsonFile(trace_path)
227 if trace.url in url_subresources:
228 continue
229 logging.info('lists resources of %s from %s' % (trace.url, trace_path))
230 urls_set = set()
231 for request_event in trace.request_track.GetEvents():
232 if not request_event.protocol.startswith('http'):
233 continue
234 if request_event.url not in urls_set:
235 logging.info(' %s' % request_event.url)
236 urls_set.add(request_event.url)
237 url_subresources[trace.url] = [url for url in urls_set]
238 return url_subresources
239
240
241 def ValidateCacheArchiveContent(ref_urls, cache_archive_path):
242 """Validates a cache archive content.
243
244 Args:
245 ref_urls: Reference list of urls.
246 cache_archive_path: Cache archive's path to validate.
247 """
248 logging.info('lists cached urls from %s' % cache_archive_path)
249 with common_util.TemporaryDirectory() as cache_directory:
250 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory)
251 cached_urls = \
252 chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys()
253 CompareUrlSet(set(ref_urls), set(cached_urls), 'cached resources',
254 debug_hint='Looks like a response header needs to be patched.')
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698