Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(82)

Side by Side Diff: tools/android/loading/sandwich_misc.py

Issue 1872313002: sandwich: Implement SandwichTaskBuilder (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Addresses pasko's comments Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2016 The Chromium Authors. All rights reserved. 1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import logging 5 import logging
6 import json
7 import os
6 8
9 import chrome_cache
10 import common_util
7 from loading_trace import LoadingTrace 11 from loading_trace import LoadingTrace
8 from prefetch_view import PrefetchSimulationView 12 from prefetch_view import PrefetchSimulationView
9 from request_dependencies_lens import RequestDependencyLens 13 from request_dependencies_lens import RequestDependencyLens
10 from user_satisfied_lens import FirstContentfulPaintLens 14 import sandwich_runner
11 import wpr_backend 15 import wpr_backend
12 16
13 17
18 # Do not prefetch anything.
19 EMPTY_CACHE_DISCOVERER = 'empty-cache'
20
21 # Prefetches everything to load fully from cache (impossible in practice).
22 FULL_CACHE_DISCOVERER = 'full-cache'
23
14 # Prefetches the first resource following the redirection chain. 24 # Prefetches the first resource following the redirection chain.
15 REDIRECTED_MAIN_DISCOVERER = 'redirected-main' 25 REDIRECTED_MAIN_DISCOVERER = 'redirected-main'
16 26
17 # All resources which are fetched from the main document and their redirections. 27 # All resources which are fetched from the main document and their redirections.
18 PARSER_DISCOVERER = 'parser' 28 PARSER_DISCOVERER = 'parser'
19 29
20 # Simulation of HTMLPreloadScanner on the main document and their redirections. 30 # Simulation of HTMLPreloadScanner on the main document and their redirections.
21 HTML_PRELOAD_SCANNER_DISCOVERER = 'html-scanner' 31 HTML_PRELOAD_SCANNER_DISCOVERER = 'html-scanner'
22 32
23 SUBRESOURCE_DISCOVERERS = set([ 33 SUBRESOURCE_DISCOVERERS = set([
34 EMPTY_CACHE_DISCOVERER,
35 FULL_CACHE_DISCOVERER,
24 REDIRECTED_MAIN_DISCOVERER, 36 REDIRECTED_MAIN_DISCOVERER,
25 PARSER_DISCOVERER, 37 PARSER_DISCOVERER,
26 HTML_PRELOAD_SCANNER_DISCOVERER 38 HTML_PRELOAD_SCANNER_DISCOVERER
27 ]) 39 ])
28 40
29 41
30 def PatchWpr(wpr_archive_path): 42 def PatchWpr(wpr_archive_path):
31 """Patches a WPR archive to get all resources into the HTTP cache and avoid 43 """Patches a WPR archive to get all resources into the HTTP cache and avoid
32 invalidation and revalidations. 44 invalidation and revalidations.
33 45
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
78 'unknown prefetch simulation {}'.format(subresource_discoverer) 90 'unknown prefetch simulation {}'.format(subresource_discoverer)
79 91
80 # Load trace and related infos. 92 # Load trace and related infos.
81 logging.info('loading %s' % loading_trace_path) 93 logging.info('loading %s' % loading_trace_path)
82 trace = LoadingTrace.FromJsonFile(loading_trace_path) 94 trace = LoadingTrace.FromJsonFile(loading_trace_path)
83 dependencies_lens = RequestDependencyLens(trace) 95 dependencies_lens = RequestDependencyLens(trace)
84 first_resource_request = trace.request_track.GetFirstResourceRequest() 96 first_resource_request = trace.request_track.GetFirstResourceRequest()
85 97
86 # Build the list of discovered requests according to the desired simulation. 98 # Build the list of discovered requests according to the desired simulation.
87 discovered_requests = [] 99 discovered_requests = []
88 if subresource_discoverer == REDIRECTED_MAIN_DISCOVERER: 100 if subresource_discoverer == EMPTY_CACHE_DISCOVERER:
101 pass
102 elif subresource_discoverer == FULL_CACHE_DISCOVERER:
103 discovered_requests = trace.request_track.GetEvents()
104 elif subresource_discoverer == REDIRECTED_MAIN_DISCOVERER:
89 discovered_requests = \ 105 discovered_requests = \
90 [dependencies_lens.GetRedirectChain(first_resource_request)[-1]] 106 [dependencies_lens.GetRedirectChain(first_resource_request)[-1]]
91 elif subresource_discoverer == PARSER_DISCOVERER: 107 elif subresource_discoverer == PARSER_DISCOVERER:
92 discovered_requests = PrefetchSimulationView.ParserDiscoverableRequests( 108 discovered_requests = PrefetchSimulationView.ParserDiscoverableRequests(
93 first_resource_request, dependencies_lens) 109 first_resource_request, dependencies_lens)
94 elif subresource_discoverer == HTML_PRELOAD_SCANNER_DISCOVERER: 110 elif subresource_discoverer == HTML_PRELOAD_SCANNER_DISCOVERER:
95 discovered_requests = PrefetchSimulationView.PreloadedRequests( 111 discovered_requests = PrefetchSimulationView.PreloadedRequests(
96 first_resource_request, dependencies_lens, trace) 112 first_resource_request, dependencies_lens, trace)
97 else: 113 else:
98 assert False 114 assert False
99 115
100 # Prune out data:// requests. 116 # Prune out data:// requests.
101 whitelisted_urls = set() 117 whitelisted_urls = set()
102 logging.info('white-listing %s' % first_resource_request.url) 118 logging.info('white-listing %s' % first_resource_request.url)
103 whitelisted_urls.add(first_resource_request.url)
104 for request in discovered_requests: 119 for request in discovered_requests:
105 # Work-around where the protocol may be none for an unclear reason yet. 120 # Work-around where the protocol may be none for an unclear reason yet.
106 # TODO(gabadie): Follow up on this with Clovis guys and possibly remove 121 # TODO(gabadie): Follow up on this with Clovis guys and possibly remove
107 # this work-around. 122 # this work-around.
108 if not request.protocol: 123 if not request.protocol:
109 logging.warning('ignoring %s (no protocol)' % request.url) 124 logging.warning('ignoring %s (no protocol)' % request.url)
110 continue 125 continue
111 # Ignore data protocols. 126 # Ignore data protocols.
112 if not request.protocol.startswith('http'): 127 if not request.protocol.startswith('http'):
113 continue 128 continue
114 logging.info('white-listing %s' % request.url) 129 logging.info('white-listing %s' % request.url)
115 whitelisted_urls.add(request.url) 130 whitelisted_urls.add(request.url)
116 return whitelisted_urls 131 return whitelisted_urls
132
133
134 def _PrintUrlSetComparison(ref_url_set, url_set, url_set_name):
135 """Compare URL sets and log the diffs.
136
137 Args:
138 ref_url_set: Set of reference urls.
139 url_set: Set of urls to compare to the reference.
140 url_set_name: The set name for logging purposes.
141 """
142 assert type(ref_url_set) == set
143 assert type(url_set) == set
144 if ref_url_set == url_set:
145 logging.info(' %d %s are matching.' % (len(ref_url_set), url_set_name))
146 return
147 logging.error(' %s are not matching.' % url_set_name)
148 logging.error(' List of missing resources:')
149 for url in ref_url_set.difference(url_set):
150 logging.error('- ' + url)
151 logging.error(' List of unexpected resources:')
152 for url in url_set.difference(ref_url_set):
153 logging.error('+ ' + url)
154
155
156 def _ListUrlRequests(trace, from_cache):
157 """Lists requested URLs from a trace.
158
159 Args:
160 trace: The trace.
161 from_cache:
162 None to list all requested urls;
pasko 2016/04/21 18:21:44 these rules are hard to remember, so the reader wo
gabadie 2016/04/22 14:16:42 Done.
163 True to list all requested urls served from cache;
164 Fals to list all requested urls not served from cache.
165
166 Returns:
167 set([str])
168 """
169 urls = set()
170 for request_event in trace.request_track.GetEvents():
171 if request_event.protocol == None:
172 continue
173 if not request_event.protocol.startswith('http'):
pasko 2016/04/21 18:21:44 This was not mentioned in the docstring. Does this
gabadie 2016/04/22 14:16:42 This what this is for.
174 continue
175 if from_cache is not None and request_event.from_disk_cache != from_cache:
176 continue
177 urls.add(request_event.url)
178 return urls
179
180
181 def VerifyBenchmarkOutputDirectory(benchmark_setup_path,
pasko 2016/04/21 18:21:44 Need to apply the same action as for ValidateCache
gabadie 2016/04/22 14:16:42 Acknowledged. But I don't want to block sandwich w
182 benchmark_output_directory_path):
183 """Verifies that all run inside the run_output_directory worked as expected.
184
185 Args:
186 benchmark_setup_path: Path of the JSON of the benchmark setup.
187 benchmark_output_directory_path: Path of the benchmark output directory to
188 verify.
189 """
190 benchmark_setup = json.load(open(benchmark_setup_path))
191 cache_whitelist = set(benchmark_setup['cache_whitelist'])
192 url_resources = set(benchmark_setup['url_resources'])
193
194 # Verify requests from traces.
195 run_id = -1
196 while True:
197 run_id += 1
198 run_path = os.path.join(benchmark_output_directory_path, str(run_id))
199 if not os.path.isdir(run_path):
200 break
201 trace_path = os.path.join(run_path, sandwich_runner.TRACE_FILENAME)
202 if not os.path.isfile(trace_path):
203 logging.error('missing trace %s' % trace_path)
204 continue
205 trace = LoadingTrace.FromJsonFile(trace_path)
206 logging.info('verifying %s from %s' % (trace.url, trace_path))
207 _PrintUrlSetComparison(url_resources, _ListUrlRequests(trace, None),
208 'All resources')
209 _PrintUrlSetComparison(url_resources.intersection(cache_whitelist),
210 _ListUrlRequests(trace, True), 'Cached resources')
211 _PrintUrlSetComparison(url_resources.difference(cache_whitelist),
212 _ListUrlRequests(trace, False),
213 'Non cached resources')
214
215
216 def ListResourceUrls(benchmark_output_directory_path):
pasko 2016/04/21 18:21:44 ReadSubresourceMapFromBenchmarkOutput(...)
gabadie 2016/04/22 14:16:42 Second time you ask me for modification! Done.
pasko 2016/04/25 13:29:06 Acknowledged.
217 """Lists all requested URLs per navigated URLs
pasko 2016/04/21 18:21:44 """Extracts a map URL-to-subresources for each nav
gabadie 2016/04/22 14:16:42 Second time you ask me for modification! Done.
pasko 2016/04/25 13:29:06 I will keep asking for modifications as many times
218
219 Args:
220 benchmark_output_directory_path: Path of the benchmark output directory to
221 verify.
222
223 Returns:
224 {url -> [URLs of sub-resources]}
225 """
226 url_subresources = {}
227 run_id = -1
228 while True:
229 run_id += 1
230 run_path = os.path.join(benchmark_output_directory_path, str(run_id))
231 if not os.path.isdir(run_path):
232 break
233 trace_path = os.path.join(run_path, sandwich_runner.TRACE_FILENAME)
234 if not os.path.isfile(trace_path):
235 continue
236 trace = LoadingTrace.FromJsonFile(trace_path)
237 if trace.url in url_subresources:
238 continue
239 logging.info('lists resources of %s from %s' % (trace.url, trace_path))
240 urls_set = set()
241 for request_event in trace.request_track.GetEvents():
242 if not request_event.protocol.startswith('http'):
243 continue
244 if request_event.url not in urls_set:
245 logging.info(' %s' % request_event.url)
246 urls_set.add(request_event.url)
247 url_subresources[trace.url] = [url for url in urls_set]
248 return url_subresources
249
250
251 def ValidateCacheArchiveContent(ref_urls, cache_archive_path):
pasko 2016/04/21 18:21:44 Producing log messages on error is insufficient -
gabadie 2016/04/22 14:16:42 I don't want to block sandwich workflow because of
pasko 2016/04/25 13:29:06 nit: In browser development the term XMLHttpReques
gabadie 2016/04/27 08:32:16 Acknowledged.
252 """Validates a cache archive content.
253
254 Args:
255 ref_urls: Reference list of urls.
256 cache_archive_path: Cache archive's path to validate.
257 """
258 logging.info('lists cached urls from %s' % cache_archive_path)
259 with common_util.TemporaryDirectory() as cache_directory:
260 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory)
261 cached_urls = \
262 chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys()
263 _PrintUrlSetComparison(set(ref_urls), set(cached_urls), 'cached resources')
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698