Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(324)

Side by Side Diff: tools/android/loading/sandwich_misc.py

Issue 1872313002: sandwich: Implement SandwichTaskBuilder (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Raises RuntimeError when encountering an unknown protocol Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2016 The Chromium Authors. All rights reserved. 1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import logging 5 import logging
6 import json
7 import os
6 8
9 import chrome_cache
10 import common_util
7 from loading_trace import LoadingTrace 11 from loading_trace import LoadingTrace
8 from prefetch_view import PrefetchSimulationView 12 from prefetch_view import PrefetchSimulationView
9 from request_dependencies_lens import RequestDependencyLens 13 from request_dependencies_lens import RequestDependencyLens
10 from user_satisfied_lens import FirstContentfulPaintLens 14 import sandwich_runner
11 import wpr_backend 15 import wpr_backend
12 16
13 17
18 # Do not prefetch anything.
19 EMPTY_CACHE_DISCOVERER = 'empty-cache'
20
21 # Prefetches everything to load fully from cache (impossible in practice).
22 FULL_CACHE_DISCOVERER = 'full-cache'
23
14 # Prefetches the first resource following the redirection chain. 24 # Prefetches the first resource following the redirection chain.
15 REDIRECTED_MAIN_DISCOVERER = 'redirected-main' 25 REDIRECTED_MAIN_DISCOVERER = 'redirected-main'
16 26
17 # All resources which are fetched from the main document and their redirections. 27 # All resources which are fetched from the main document and their redirections.
18 PARSER_DISCOVERER = 'parser' 28 PARSER_DISCOVERER = 'parser'
19 29
20 # Simulation of HTMLPreloadScanner on the main document and their redirections. 30 # Simulation of HTMLPreloadScanner on the main document and their redirections.
21 HTML_PRELOAD_SCANNER_DISCOVERER = 'html-scanner' 31 HTML_PRELOAD_SCANNER_DISCOVERER = 'html-scanner'
22 32
23 SUBRESOURCE_DISCOVERERS = set([ 33 SUBRESOURCE_DISCOVERERS = set([
34 EMPTY_CACHE_DISCOVERER,
35 FULL_CACHE_DISCOVERER,
24 REDIRECTED_MAIN_DISCOVERER, 36 REDIRECTED_MAIN_DISCOVERER,
25 PARSER_DISCOVERER, 37 PARSER_DISCOVERER,
26 HTML_PRELOAD_SCANNER_DISCOVERER 38 HTML_PRELOAD_SCANNER_DISCOVERER
27 ]) 39 ])
28 40
29 41
30 def PatchWpr(wpr_archive_path): 42 def PatchWpr(wpr_archive_path):
31 """Patches a WPR archive to get all resources into the HTTP cache and avoid 43 """Patches a WPR archive to get all resources into the HTTP cache and avoid
32 invalidation and revalidations. 44 invalidation and revalidations.
33 45
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
78 'unknown prefetch simulation {}'.format(subresource_discoverer) 90 'unknown prefetch simulation {}'.format(subresource_discoverer)
79 91
80 # Load trace and related infos. 92 # Load trace and related infos.
81 logging.info('loading %s' % loading_trace_path) 93 logging.info('loading %s' % loading_trace_path)
82 trace = LoadingTrace.FromJsonFile(loading_trace_path) 94 trace = LoadingTrace.FromJsonFile(loading_trace_path)
83 dependencies_lens = RequestDependencyLens(trace) 95 dependencies_lens = RequestDependencyLens(trace)
84 first_resource_request = trace.request_track.GetFirstResourceRequest() 96 first_resource_request = trace.request_track.GetFirstResourceRequest()
85 97
86 # Build the list of discovered requests according to the desired simulation. 98 # Build the list of discovered requests according to the desired simulation.
87 discovered_requests = [] 99 discovered_requests = []
88 if subresource_discoverer == REDIRECTED_MAIN_DISCOVERER: 100 if subresource_discoverer == EMPTY_CACHE_DISCOVERER:
101 pass
102 elif subresource_discoverer == FULL_CACHE_DISCOVERER:
103 discovered_requests = trace.request_track.GetEvents()
104 elif subresource_discoverer == REDIRECTED_MAIN_DISCOVERER:
89 discovered_requests = \ 105 discovered_requests = \
90 [dependencies_lens.GetRedirectChain(first_resource_request)[-1]] 106 [dependencies_lens.GetRedirectChain(first_resource_request)[-1]]
91 elif subresource_discoverer == PARSER_DISCOVERER: 107 elif subresource_discoverer == PARSER_DISCOVERER:
92 discovered_requests = PrefetchSimulationView.ParserDiscoverableRequests( 108 discovered_requests = PrefetchSimulationView.ParserDiscoverableRequests(
93 first_resource_request, dependencies_lens) 109 first_resource_request, dependencies_lens)
94 elif subresource_discoverer == HTML_PRELOAD_SCANNER_DISCOVERER: 110 elif subresource_discoverer == HTML_PRELOAD_SCANNER_DISCOVERER:
95 discovered_requests = PrefetchSimulationView.PreloadedRequests( 111 discovered_requests = PrefetchSimulationView.PreloadedRequests(
96 first_resource_request, dependencies_lens, trace) 112 first_resource_request, dependencies_lens, trace)
97 else: 113 else:
98 assert False 114 assert False
99 115
100 # Prune out data:// requests. 116 # Prune out data:// requests.
101 whitelisted_urls = set() 117 whitelisted_urls = set()
102 logging.info('white-listing %s' % first_resource_request.url) 118 logging.info('white-listing %s' % first_resource_request.url)
103 whitelisted_urls.add(first_resource_request.url)
104 for request in discovered_requests: 119 for request in discovered_requests:
105 # Work-around where the protocol may be none for an unclear reason yet. 120 # Work-around where the protocol may be none for an unclear reason yet.
106 # TODO(gabadie): Follow up on this with Clovis guys and possibly remove 121 # TODO(gabadie): Follow up on this with Clovis guys and possibly remove
107 # this work-around. 122 # this work-around.
108 if not request.protocol: 123 if not request.protocol:
109 logging.warning('ignoring %s (no protocol)' % request.url) 124 logging.warning('ignoring %s (no protocol)' % request.url)
110 continue 125 continue
111 # Ignore data protocols. 126 # Ignore data protocols.
112 if not request.protocol.startswith('http'): 127 if not request.protocol.startswith('http'):
113 continue 128 continue
114 logging.info('white-listing %s' % request.url) 129 logging.info('white-listing %s' % request.url)
115 whitelisted_urls.add(request.url) 130 whitelisted_urls.add(request.url)
116 return whitelisted_urls 131 return whitelisted_urls
132
133
134 def _PrintUrlSetComparison(ref_url_set, url_set, url_set_name):
135 """Compare URL sets and log the diffs.
136
137 Args:
138 ref_url_set: Set of reference urls.
139 url_set: Set of urls to compare to the reference.
140 url_set_name: The set name for logging purposes.
141 """
142 assert type(ref_url_set) == set
143 assert type(url_set) == set
144 if ref_url_set == url_set:
145 logging.info(' %d %s are matching.' % (len(ref_url_set), url_set_name))
146 return
147 logging.error(' %s are not matching.' % url_set_name)
148 logging.error(' List of missing resources:')
149 for url in ref_url_set.difference(url_set):
150 logging.error('- ' + url)
151 logging.error(' List of unexpected resources:')
152 for url in url_set.difference(ref_url_set):
153 logging.error('+ ' + url)
154
155
156 class _RequestOutcome:
157 All, ServedFromCache, NotServedFromCache = range(3)
158
159
160 def _ListUrlRequests(trace, request_kind):
161 """Lists requested URLs from a trace.
162
163 Args:
164 trace: (LoadingTrace) loading trace.
165 request_kind: _RequestOutcome indicating the subset of requests to output.
166
167 Returns:
168 set([str])
169 """
170 urls = set()
171 for request_event in trace.request_track.GetEvents():
172 if request_event.protocol == None:
173 continue
174 if request_event.protocol.startswith('data'):
175 continue
176 if request_event.protocol.startswith('http'):
177 raise RuntimeError('Unknown protocol {}'.format(request_event.protocol))
178 if (request_kind == _RequestOutcome.ServedFromCache and
179 request_event.from_disk_cache):
180 urls.add(request_event.url)
181 elif (request_kind == _RequestOutcome.NotServedFromCache and
182 not request_event.from_disk_cache):
183 urls.add(request_event.url)
184 elif request_kind == _RequestOutcome.All:
185 urls.add(request_event.url)
186 return urls
187
188
189 def VerifyBenchmarkOutputDirectory(benchmark_setup_path,
190 benchmark_output_directory_path):
191 """Verifies that all run inside the run_output_directory worked as expected.
192
193 Args:
194 benchmark_setup_path: Path of the JSON of the benchmark setup.
195 benchmark_output_directory_path: Path of the benchmark output directory to
196 verify.
197 """
198 # TODO(gabadie): What's the best way of propagating errors happening in here?
199 benchmark_setup = json.load(open(benchmark_setup_path))
200 cache_whitelist = set(benchmark_setup['cache_whitelist'])
201 url_resources = set(benchmark_setup['url_resources'])
202
203 # Verify requests from traces.
204 run_id = -1
205 while True:
206 run_id += 1
207 run_path = os.path.join(benchmark_output_directory_path, str(run_id))
208 if not os.path.isdir(run_path):
209 break
210 trace_path = os.path.join(run_path, sandwich_runner.TRACE_FILENAME)
211 if not os.path.isfile(trace_path):
212 logging.error('missing trace %s' % trace_path)
213 continue
214 trace = LoadingTrace.FromJsonFile(trace_path)
215 logging.info('verifying %s from %s' % (trace.url, trace_path))
216 _PrintUrlSetComparison(url_resources,
217 _ListUrlRequests(trace, _RequestOutcome.All), 'All resources')
218 _PrintUrlSetComparison(url_resources.intersection(cache_whitelist),
219 _ListUrlRequests(trace, _RequestOutcome.ServedFromCache),
220 'Cached resources')
221 _PrintUrlSetComparison(url_resources.difference(cache_whitelist),
222 _ListUrlRequests(trace, _RequestOutcome.NotServedFromCache),
223 'Non cached resources')
224
225
226 def ReadSubresourceMapFromBenchmarkOutput(benchmark_output_directory_path):
227 """Extracts a map URL-to-subresources for each navigation in benchmark
228 directory.
229
230 Args:
231 benchmark_output_directory_path: Path of the benchmark output directory to
232 verify.
233
234 Returns:
235 {url -> [URLs of sub-resources]}
236 """
237 url_subresources = {}
238 run_id = -1
239 while True:
240 run_id += 1
241 run_path = os.path.join(benchmark_output_directory_path, str(run_id))
242 if not os.path.isdir(run_path):
243 break
244 trace_path = os.path.join(run_path, sandwich_runner.TRACE_FILENAME)
245 if not os.path.isfile(trace_path):
246 continue
247 trace = LoadingTrace.FromJsonFile(trace_path)
248 if trace.url in url_subresources:
249 continue
250 logging.info('lists resources of %s from %s' % (trace.url, trace_path))
251 urls_set = set()
252 for request_event in trace.request_track.GetEvents():
253 if not request_event.protocol.startswith('http'):
254 continue
255 if request_event.url not in urls_set:
256 logging.info(' %s' % request_event.url)
257 urls_set.add(request_event.url)
258 url_subresources[trace.url] = [url for url in urls_set]
259 return url_subresources
260
261
262 def ValidateCacheArchiveContent(ref_urls, cache_archive_path):
263 """Validates a cache archive content.
264
265 Args:
266 ref_urls: Reference list of urls.
267 cache_archive_path: Cache archive's path to validate.
268 """
269 # TODO(gabadie): What's the best way of propagating errors happening in here?
270 logging.info('lists cached urls from %s' % cache_archive_path)
271 with common_util.TemporaryDirectory() as cache_directory:
272 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory)
273 cached_urls = \
274 chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys()
275 _PrintUrlSetComparison(set(ref_urls), set(cached_urls), 'cached resources')
OLDNEW
« no previous file with comments | « tools/android/loading/sandwich_metrics_unittest.py ('k') | tools/android/loading/sandwich_misc_unittest.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698