OLD | NEW |
---|---|
1 # Copyright 2016 The Chromium Authors. All rights reserved. | 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 import logging | 5 import logging |
6 import json | |
7 import os | |
6 | 8 |
9 import chrome_cache | |
10 import common_util | |
7 from loading_trace import LoadingTrace | 11 from loading_trace import LoadingTrace |
8 from prefetch_view import PrefetchSimulationView | 12 from prefetch_view import PrefetchSimulationView |
9 from request_dependencies_lens import RequestDependencyLens | 13 from request_dependencies_lens import RequestDependencyLens |
10 from user_satisfied_lens import FirstContentfulPaintLens | 14 import sandwich_runner |
11 import wpr_backend | 15 import wpr_backend |
12 | 16 |
13 | 17 |
18 # Do not prefetch anything. | |
19 EMPTY_CACHE_DISCOVERER = 'empty-cache' | |
20 | |
21 # Prefetches everything to load fully from cache (impossible in practice). | |
22 FULL_CACHE_DISCOVERER = 'full-cache' | |
23 | |
14 # Prefetches the first resource following the redirection chain. | 24 # Prefetches the first resource following the redirection chain. |
15 REDIRECTED_MAIN_DISCOVERER = 'redirected-main' | 25 REDIRECTED_MAIN_DISCOVERER = 'redirected-main' |
16 | 26 |
17 # All resources which are fetched from the main document and their redirections. | 27 # All resources which are fetched from the main document and their redirections. |
18 PARSER_DISCOVERER = 'parser' | 28 PARSER_DISCOVERER = 'parser' |
19 | 29 |
20 # Simulation of HTMLPreloadScanner on the main document and their redirections. | 30 # Simulation of HTMLPreloadScanner on the main document and their redirections. |
21 HTML_PRELOAD_SCANNER_DISCOVERER = 'html-scanner' | 31 HTML_PRELOAD_SCANNER_DISCOVERER = 'html-scanner' |
22 | 32 |
23 SUBRESOURCE_DISCOVERERS = set([ | 33 SUBRESOURCE_DISCOVERERS = set([ |
34 EMPTY_CACHE_DISCOVERER, | |
35 FULL_CACHE_DISCOVERER, | |
24 REDIRECTED_MAIN_DISCOVERER, | 36 REDIRECTED_MAIN_DISCOVERER, |
25 PARSER_DISCOVERER, | 37 PARSER_DISCOVERER, |
26 HTML_PRELOAD_SCANNER_DISCOVERER | 38 HTML_PRELOAD_SCANNER_DISCOVERER |
27 ]) | 39 ]) |
28 | 40 |
29 | 41 |
30 def PatchWpr(wpr_archive_path): | 42 def PatchWpr(wpr_archive_path): |
31 """Patches a WPR archive to get all resources into the HTTP cache and avoid | 43 """Patches a WPR archive to get all resources into the HTTP cache and avoid |
32 invalidation and revalidations. | 44 invalidation and revalidations. |
33 | 45 |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
78 'unknown prefetch simulation {}'.format(subresource_discoverer) | 90 'unknown prefetch simulation {}'.format(subresource_discoverer) |
79 | 91 |
80 # Load trace and related infos. | 92 # Load trace and related infos. |
81 logging.info('loading %s' % loading_trace_path) | 93 logging.info('loading %s' % loading_trace_path) |
82 trace = LoadingTrace.FromJsonFile(loading_trace_path) | 94 trace = LoadingTrace.FromJsonFile(loading_trace_path) |
83 dependencies_lens = RequestDependencyLens(trace) | 95 dependencies_lens = RequestDependencyLens(trace) |
84 first_resource_request = trace.request_track.GetFirstResourceRequest() | 96 first_resource_request = trace.request_track.GetFirstResourceRequest() |
85 | 97 |
86 # Build the list of discovered requests according to the desired simulation. | 98 # Build the list of discovered requests according to the desired simulation. |
87 discovered_requests = [] | 99 discovered_requests = [] |
88 if subresource_discoverer == REDIRECTED_MAIN_DISCOVERER: | 100 if subresource_discoverer == EMPTY_CACHE_DISCOVERER: |
101 pass | |
102 elif subresource_discoverer == FULL_CACHE_DISCOVERER: | |
103 discovered_requests = trace.request_track.GetEvents() | |
104 elif subresource_discoverer == REDIRECTED_MAIN_DISCOVERER: | |
89 discovered_requests = \ | 105 discovered_requests = \ |
90 [dependencies_lens.GetRedirectChain(first_resource_request)[-1]] | 106 [dependencies_lens.GetRedirectChain(first_resource_request)[-1]] |
91 elif subresource_discoverer == PARSER_DISCOVERER: | 107 elif subresource_discoverer == PARSER_DISCOVERER: |
92 discovered_requests = PrefetchSimulationView.ParserDiscoverableRequests( | 108 discovered_requests = PrefetchSimulationView.ParserDiscoverableRequests( |
93 first_resource_request, dependencies_lens) | 109 first_resource_request, dependencies_lens) |
94 elif subresource_discoverer == HTML_PRELOAD_SCANNER_DISCOVERER: | 110 elif subresource_discoverer == HTML_PRELOAD_SCANNER_DISCOVERER: |
95 discovered_requests = PrefetchSimulationView.PreloadedRequests( | 111 discovered_requests = PrefetchSimulationView.PreloadedRequests( |
96 first_resource_request, dependencies_lens, trace) | 112 first_resource_request, dependencies_lens, trace) |
97 else: | 113 else: |
98 assert False | 114 assert False |
99 | 115 |
100 # Prune out data:// requests. | 116 # Prune out data:// requests. |
101 whitelisted_urls = set() | 117 whitelisted_urls = set() |
102 logging.info('white-listing %s' % first_resource_request.url) | 118 logging.info('white-listing %s' % first_resource_request.url) |
103 whitelisted_urls.add(first_resource_request.url) | |
104 for request in discovered_requests: | 119 for request in discovered_requests: |
105 # Work-around where the protocol may be none for an unclear reason yet. | 120 # Work-around where the protocol may be none for an unclear reason yet. |
106 # TODO(gabadie): Follow up on this with Clovis guys and possibly remove | 121 # TODO(gabadie): Follow up on this with Clovis guys and possibly remove |
107 # this work-around. | 122 # this work-around. |
108 if not request.protocol: | 123 if not request.protocol: |
109 logging.warning('ignoring %s (no protocol)' % request.url) | 124 logging.warning('ignoring %s (no protocol)' % request.url) |
110 continue | 125 continue |
111 # Ignore data protocols. | 126 # Ignore data protocols. |
112 if not request.protocol.startswith('http'): | 127 if not request.protocol.startswith('http'): |
113 continue | 128 continue |
114 logging.info('white-listing %s' % request.url) | 129 logging.info('white-listing %s' % request.url) |
115 whitelisted_urls.add(request.url) | 130 whitelisted_urls.add(request.url) |
116 return whitelisted_urls | 131 return whitelisted_urls |
132 | |
133 | |
134 def _PrintUrlSetComparison(ref_url_set, url_set, url_set_name): | |
135 """Compare URL sets and log the diffs. | |
136 | |
137 Args: | |
138 ref_url_set: Set of reference urls. | |
139 url_set: Set of urls to compare to the reference. | |
140 url_set_name: The set name for logging purposes. | |
141 """ | |
142 assert type(ref_url_set) == set | |
143 assert type(url_set) == set | |
144 if ref_url_set == url_set: | |
145 logging.info(' %d %s are matching.' % (len(ref_url_set), url_set_name)) | |
146 return | |
147 logging.error(' %s are not matching.' % url_set_name) | |
148 logging.error(' List of missing resources:') | |
149 for url in ref_url_set.difference(url_set): | |
150 logging.error('- ' + url) | |
151 logging.error(' List of unexpected resources:') | |
152 for url in url_set.difference(ref_url_set): | |
153 logging.error('+ ' + url) | |
154 | |
155 | |
156 def _ListUrlRequests(trace, from_cache): | |
157 """Lists requested URLs from a trace. | |
158 | |
159 Args: | |
160 trace: The trace. | |
161 from_cache: | |
162 None to list all requested urls; | |
pasko
2016/04/21 18:21:44
these rules are hard to remember, so the reader wo
gabadie
2016/04/22 14:16:42
Done.
| |
163 True to list all requested urls served from cache; | |
164 Fals to list all requested urls not served from cache. | |
165 | |
166 Returns: | |
167 set([str]) | |
168 """ | |
169 urls = set() | |
170 for request_event in trace.request_track.GetEvents(): | |
171 if request_event.protocol == None: | |
172 continue | |
173 if not request_event.protocol.startswith('http'): | |
pasko
2016/04/21 18:21:44
This was not mentioned in the docstring. Does this
gabadie
2016/04/22 14:16:42
This what this is for.
| |
174 continue | |
175 if from_cache is not None and request_event.from_disk_cache != from_cache: | |
176 continue | |
177 urls.add(request_event.url) | |
178 return urls | |
179 | |
180 | |
181 def VerifyBenchmarkOutputDirectory(benchmark_setup_path, | |
pasko
2016/04/21 18:21:44
Need to apply the same action as for ValidateCache
gabadie
2016/04/22 14:16:42
Acknowledged. But I don't want to block sandwich w
| |
182 benchmark_output_directory_path): | |
183 """Verifies that all run inside the run_output_directory worked as expected. | |
184 | |
185 Args: | |
186 benchmark_setup_path: Path of the JSON of the benchmark setup. | |
187 benchmark_output_directory_path: Path of the benchmark output directory to | |
188 verify. | |
189 """ | |
190 benchmark_setup = json.load(open(benchmark_setup_path)) | |
191 cache_whitelist = set(benchmark_setup['cache_whitelist']) | |
192 url_resources = set(benchmark_setup['url_resources']) | |
193 | |
194 # Verify requests from traces. | |
195 run_id = -1 | |
196 while True: | |
197 run_id += 1 | |
198 run_path = os.path.join(benchmark_output_directory_path, str(run_id)) | |
199 if not os.path.isdir(run_path): | |
200 break | |
201 trace_path = os.path.join(run_path, sandwich_runner.TRACE_FILENAME) | |
202 if not os.path.isfile(trace_path): | |
203 logging.error('missing trace %s' % trace_path) | |
204 continue | |
205 trace = LoadingTrace.FromJsonFile(trace_path) | |
206 logging.info('verifying %s from %s' % (trace.url, trace_path)) | |
207 _PrintUrlSetComparison(url_resources, _ListUrlRequests(trace, None), | |
208 'All resources') | |
209 _PrintUrlSetComparison(url_resources.intersection(cache_whitelist), | |
210 _ListUrlRequests(trace, True), 'Cached resources') | |
211 _PrintUrlSetComparison(url_resources.difference(cache_whitelist), | |
212 _ListUrlRequests(trace, False), | |
213 'Non cached resources') | |
214 | |
215 | |
216 def ListResourceUrls(benchmark_output_directory_path): | |
pasko
2016/04/21 18:21:44
ReadSubresourceMapFromBenchmarkOutput(...)
gabadie
2016/04/22 14:16:42
Second time you ask me for modification! Done.
pasko
2016/04/25 13:29:06
Acknowledged.
| |
217 """Lists all requested URLs per navigated URLs | |
pasko
2016/04/21 18:21:44
"""Extracts a map URL-to-subresources for each nav
gabadie
2016/04/22 14:16:42
Second time you ask me for modification! Done.
pasko
2016/04/25 13:29:06
I will keep asking for modifications as many times
| |
218 | |
219 Args: | |
220 benchmark_output_directory_path: Path of the benchmark output directory to | |
221 verify. | |
222 | |
223 Returns: | |
224 {url -> [URLs of sub-resources]} | |
225 """ | |
226 url_subresources = {} | |
227 run_id = -1 | |
228 while True: | |
229 run_id += 1 | |
230 run_path = os.path.join(benchmark_output_directory_path, str(run_id)) | |
231 if not os.path.isdir(run_path): | |
232 break | |
233 trace_path = os.path.join(run_path, sandwich_runner.TRACE_FILENAME) | |
234 if not os.path.isfile(trace_path): | |
235 continue | |
236 trace = LoadingTrace.FromJsonFile(trace_path) | |
237 if trace.url in url_subresources: | |
238 continue | |
239 logging.info('lists resources of %s from %s' % (trace.url, trace_path)) | |
240 urls_set = set() | |
241 for request_event in trace.request_track.GetEvents(): | |
242 if not request_event.protocol.startswith('http'): | |
243 continue | |
244 if request_event.url not in urls_set: | |
245 logging.info(' %s' % request_event.url) | |
246 urls_set.add(request_event.url) | |
247 url_subresources[trace.url] = [url for url in urls_set] | |
248 return url_subresources | |
249 | |
250 | |
251 def ValidateCacheArchiveContent(ref_urls, cache_archive_path): | |
pasko
2016/04/21 18:21:44
Producing log messages on error is insufficient -
gabadie
2016/04/22 14:16:42
I don't want to block sandwich workflow because of
pasko
2016/04/25 13:29:06
nit: In browser development the term XMLHttpReques
gabadie
2016/04/27 08:32:16
Acknowledged.
| |
252 """Validates a cache archive content. | |
253 | |
254 Args: | |
255 ref_urls: Reference list of urls. | |
256 cache_archive_path: Cache archive's path to validate. | |
257 """ | |
258 logging.info('lists cached urls from %s' % cache_archive_path) | |
259 with common_util.TemporaryDirectory() as cache_directory: | |
260 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory) | |
261 cached_urls = \ | |
262 chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys() | |
263 _PrintUrlSetComparison(set(ref_urls), set(cached_urls), 'cached resources') | |
OLD | NEW |