Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. | 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 import logging | 5 import logging |
| 6 import json | |
| 7 import os | |
| 6 | 8 |
| 9 import chrome_cache | |
| 10 import common_util | |
| 7 from loading_trace import LoadingTrace | 11 from loading_trace import LoadingTrace |
| 8 from prefetch_view import PrefetchSimulationView | 12 from prefetch_view import PrefetchSimulationView |
| 9 from request_dependencies_lens import RequestDependencyLens | 13 from request_dependencies_lens import RequestDependencyLens |
| 10 from user_satisfied_lens import FirstContentfulPaintLens | |
| 11 import wpr_backend | 14 import wpr_backend |
| 12 | 15 |
| 13 | 16 |
| 17 # Do not prefetch anything. | |
| 18 NO_DISCOVERER = 'no-discoverer' | |
| 19 | |
| 20 # Prefetches everything to load fully from cache (impossible in practice). | |
| 21 FULL_CACHE_DISCOVERER = 'full-cache' | |
| 22 | |
| 14 # Prefetches the first resource following the redirection chain. | 23 # Prefetches the first resource following the redirection chain. |
| 15 REDIRECTED_MAIN_DISCOVERER = 'redirected-main' | 24 REDIRECTED_MAIN_DISCOVERER = 'redirected-main' |
| 16 | 25 |
| 17 # All resources which are fetched from the main document and their redirections. | 26 # All resources which are fetched from the main document and their redirections. |
| 18 PARSER_DISCOVERER = 'parser', | 27 PARSER_DISCOVERER = 'parser' |
| 19 | 28 |
| 20 # Simulation of HTMLPreloadScanner on the main document and their redirections. | 29 # Simulation of HTMLPreloadScanner on the main document and their redirections. |
| 21 HTML_PRELOAD_SCANNER_DISCOVERER = 'html-scanner', | 30 HTML_PRELOAD_SCANNER_DISCOVERER = 'html-scanner' |
| 22 | 31 |
| 23 SUBRESOURCE_DISCOVERERS = set([ | 32 SUBRESOURCE_DISCOVERERS = set([ |
| 33 NO_DISCOVERER, | |
| 34 FULL_CACHE_DISCOVERER, | |
| 24 REDIRECTED_MAIN_DISCOVERER, | 35 REDIRECTED_MAIN_DISCOVERER, |
| 25 PARSER_DISCOVERER, | 36 PARSER_DISCOVERER, |
| 26 HTML_PRELOAD_SCANNER_DISCOVERER | 37 HTML_PRELOAD_SCANNER_DISCOVERER |
| 27 ]) | 38 ]) |
| 28 | 39 |
| 29 | 40 |
| 30 def PatchWpr(wpr_archive_path): | 41 def PatchWpr(wpr_archive_path): |
| 31 """Patches a WPR archive to get all resources into the HTTP cache and avoid | 42 """Patches a WPR archive to get all resources into the HTTP cache and avoid |
| 32 invalidation and revalidations. | 43 invalidation and revalidations. |
| 33 | 44 |
| (...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 78 'unknown prefetch simulation {}'.format(subresource_discoverer) | 89 'unknown prefetch simulation {}'.format(subresource_discoverer) |
| 79 | 90 |
| 80 # Load trace and related infos. | 91 # Load trace and related infos. |
| 81 logging.info('loading %s' % loading_trace_path) | 92 logging.info('loading %s' % loading_trace_path) |
| 82 trace = LoadingTrace.FromJsonFile(loading_trace_path) | 93 trace = LoadingTrace.FromJsonFile(loading_trace_path) |
| 83 dependencies_lens = RequestDependencyLens(trace) | 94 dependencies_lens = RequestDependencyLens(trace) |
| 84 first_resource_request = trace.request_track.GetFirstResourceRequest() | 95 first_resource_request = trace.request_track.GetFirstResourceRequest() |
| 85 | 96 |
| 86 # Build the list of discovered requests according to the desired simulation. | 97 # Build the list of discovered requests according to the desired simulation. |
| 87 discovered_requests = [] | 98 discovered_requests = [] |
| 88 if subresource_discoverer == REDIRECTED_MAIN_DISCOVERER: | 99 if subresource_discoverer == NO_DISCOVERER: |
| 100 pass | |
| 101 elif subresource_discoverer == FULL_CACHE_DISCOVERER: | |
| 102 discovered_requests = trace.request_track.GetEvents() | |
| 103 elif subresource_discoverer == REDIRECTED_MAIN_DISCOVERER: | |
| 89 discovered_requests = \ | 104 discovered_requests = \ |
| 90 [dependencies_lens.GetRedirectChain(first_resource_request)[-1]] | 105 [dependencies_lens.GetRedirectChain(first_resource_request)[-1]] |
| 91 elif subresource_discoverer == PARSER_DISCOVERER: | 106 elif subresource_discoverer == PARSER_DISCOVERER: |
| 92 discovered_requests = PrefetchSimulationView.ParserDiscoverableRequests( | 107 discovered_requests = PrefetchSimulationView.ParserDiscoverableRequests( |
| 93 first_resource_request, dependencies_lens) | 108 first_resource_request, dependencies_lens) |
| 94 elif subresource_discoverer == HTML_PRELOAD_SCANNER_DISCOVERER: | 109 elif subresource_discoverer == HTML_PRELOAD_SCANNER_DISCOVERER: |
| 95 discovered_requests = PrefetchSimulationView.PreloadedRequests( | 110 discovered_requests = PrefetchSimulationView.PreloadedRequests( |
| 96 first_resource_request, dependencies_lens, trace) | 111 first_resource_request, dependencies_lens, trace) |
| 97 else: | 112 else: |
| 98 assert False | 113 assert False |
| 99 | 114 |
| 100 # Prune out data:// requests. | 115 # Prune out data:// requests. |
| 101 whitelisted_urls = set() | 116 whitelisted_urls = set() |
| 102 logging.info('white-listing %s' % first_resource_request.url) | 117 logging.info('white-listing %s' % first_resource_request.url) |
| 103 whitelisted_urls.add(first_resource_request.url) | |
| 104 for request in discovered_requests: | 118 for request in discovered_requests: |
| 105 # Work-around where the protocol may be none for an unclear reason yet. | 119 # Work-around where the protocol may be none for an unclear reason yet. |
| 106 # TODO(gabadie): Follow up on this with Clovis guys and possibly remove | 120 # TODO(gabadie): Follow up on this with Clovis guys and possibly remove |
| 107 # this work-around. | 121 # this work-around. |
| 108 if not request.protocol: | 122 if not request.protocol: |
| 109 logging.warning('ignoring %s (no protocol)' % request.url) | 123 logging.warning('ignoring %s (no protocol)' % request.url) |
| 110 continue | 124 continue |
| 111 # Ignore data protocols. | 125 # Ignore data protocols. |
| 112 if not request.protocol.startswith('http'): | 126 if not request.protocol.startswith('http'): |
| 113 continue | 127 continue |
| 114 logging.info('white-listing %s' % request.url) | 128 logging.info('white-listing %s' % request.url) |
| 115 whitelisted_urls.add(request.url) | 129 whitelisted_urls.add(request.url) |
| 116 return whitelisted_urls | 130 return whitelisted_urls |
| 131 | |
| 132 | |
| 133 def CompareUrlSet(ref_url_set, url_set, url_set_name, debug_hint='Good luck!'): | |
|
pasko
2016/04/18 09:36:12
I find this debug hint to be slightly offensive :)
pasko
2016/04/18 09:36:13
why is this function in sandwich_misc? is it going
pasko
2016/04/18 09:36:13
more intuitive name: PrintUrlSetComparison
A func
pasko
2016/04/18 09:36:13
not used outside sandwich_misc -> _Compare...
gabadie
2016/04/19 17:39:48
Done.
gabadie
2016/04/19 17:39:48
Not sure to understand. Used only in this file.
gabadie
2016/04/19 17:39:48
Done.
gabadie
2016/04/19 17:39:49
Done.
| |
| 134 """Compare URL sets | |
| 135 | |
|
pasko
2016/04/18 09:36:12
need to explain what the function prints
gabadie
2016/04/19 17:39:49
Done.
| |
| 136 Args: | |
| 137 ref_url_set: Set of reference urls. | |
| 138 url_set: Set of urls to compare to the reference. | |
| 139 url_set_name: The set name for logging purposes. | |
| 140 debug_hint: A debug hint to help debugging in any case the sets are | |
| 141 different. | |
| 142 """ | |
| 143 assert type(ref_url_set) == set | |
| 144 assert type(url_set) == set | |
| 145 if ref_url_set == url_set: | |
| 146 logging.info(' %d %s are matching.' % (len(ref_url_set), url_set_name)) | |
|
pasko
2016/04/18 09:36:13
why % formatting here and {} formatting in other p
gabadie
2016/04/19 17:39:49
There is an annoying check in pylint to force the
| |
| 147 return | |
| 148 logging.error(' %s are not matching.' % url_set_name) | |
| 149 logging.error(' Hint: ' + debug_hint) | |
| 150 logging.error(' List of missing resources:') | |
| 151 for url in ref_url_set.difference(url_set): | |
| 152 logging.error('- ' + url) | |
| 153 logging.error(' List of unexpected resources:') | |
| 154 for url in url_set.difference(ref_url_set): | |
| 155 logging.error('+ ' + url) | |
| 156 | |
| 157 | |
| 158 def _ListUrlRequests(trace, from_cache=None): | |
|
pasko
2016/04/18 09:36:13
default arguments that can be True, False and None
gabadie
2016/04/19 17:39:48
Done.
| |
| 159 urls = set() | |
| 160 for request_event in trace.request_track.GetEvents(): | |
| 161 if request_event.protocol == None: | |
| 162 continue | |
| 163 if not request_event.protocol.startswith('http'): | |
| 164 continue | |
| 165 if from_cache is not None and request_event.from_disk_cache != from_cache: | |
| 166 continue | |
| 167 urls.add(request_event.url) | |
| 168 return urls | |
| 169 | |
| 170 | |
| 171 def VerifyBenchmarkOutputDirectory(benchmark_setup_path, | |
| 172 benchmark_output_directory_path): | |
| 173 """Verifies that all run inside the run_output_directory worked as expected. | |
| 174 | |
| 175 Args: | |
| 176 benchmark_setup_path: Path of the JSON of the benchmark setup. | |
|
pasko
2016/04/18 09:36:13
what is a 'benchmark setup'?
gabadie
2016/04/19 17:39:48
See SetupBenchmark in sandwich_task_builder.py
| |
| 177 benchmark_output_directory_path: Path of the benchmark output directory to | |
| 178 verify. | |
| 179 """ | |
| 180 benchmark_setup = json.load(open(benchmark_setup_path)) | |
| 181 cache_whitelist = set(benchmark_setup['cache_whitelist']) | |
| 182 url_resources = set(benchmark_setup['url_resources']) | |
| 183 | |
| 184 # Verify requests from traces. | |
| 185 run_id = -1 | |
| 186 while True: | |
| 187 run_id += 1 | |
| 188 run_path = os.path.join(benchmark_output_directory_path, str(run_id)) | |
| 189 if not os.path.isdir(run_path): | |
| 190 break | |
| 191 trace_path = os.path.join(run_path, 'trace.json') | |
|
pasko
2016/04/18 09:36:13
'trace.json' exists in many files, consider making
gabadie
2016/04/19 17:39:48
Nop. trace.json is sandwich specific. I don't see
| |
| 192 if not os.path.isfile(trace_path): | |
| 193 logging.error('missing trace %s' % trace_path) | |
| 194 continue | |
| 195 trace = LoadingTrace.FromJsonFile(trace_path) | |
| 196 logging.info('verifying %s from %s' % (trace.url, trace_path)) | |
| 197 CompareUrlSet(url_resources, _ListUrlRequests(trace), 'All resources', | |
|
pasko
2016/04/18 09:36:13
explicit second arg for _ListUrlRequests is needed
gabadie
2016/04/19 17:39:48
Done.
| |
| 198 'You may have an issue with an AJAX requests.') | |
|
pasko
2016/04/18 09:36:13
I think one can easily tell this by looking at URL
gabadie
2016/04/19 17:39:48
Done.
| |
| 199 CompareUrlSet(url_resources.intersection(cache_whitelist), | |
| 200 _ListUrlRequests(trace, True), 'Cached resources', | |
| 201 'The WPR archive patcher may have an invalidation issue.') | |
|
pasko
2016/04/18 09:36:13
It's slightly funny to see gabadie@ providing hint
gabadie
2016/04/19 17:39:48
The point has to give hint to user who wanted to r
| |
| 202 CompareUrlSet(url_resources.difference(cache_whitelist), | |
| 203 _ListUrlRequests(trace, False), 'Non cached resources') | |
| 204 | |
| 205 | |
| 206 def ListResourcesUrls(benchmark_output_directory_path): | |
|
pasko
2016/04/18 09:36:13
ListResourceUrls
gabadie
2016/04/19 17:39:48
Done.
| |
| 207 """Lists all requested urls per navigated urls | |
|
pasko
2016/04/18 09:36:12
s/urls/URLs/
gabadie
2016/04/19 17:39:48
Done.
| |
| 208 | |
| 209 Args: | |
| 210 benchmark_output_directory_path: Path of the benchmark output directory to | |
| 211 verify. | |
| 212 | |
| 213 Returns: | |
| 214 {url -> [urls of sub-resources]} | |
| 215 """ | |
| 216 url_subresources = {} | |
| 217 run_id = -1 | |
| 218 while True: | |
| 219 run_id += 1 | |
| 220 run_path = os.path.join(benchmark_output_directory_path, str(run_id)) | |
| 221 if not os.path.isdir(run_path): | |
| 222 break | |
| 223 trace_path = os.path.join(run_path, 'trace.json') | |
| 224 if not os.path.isfile(trace_path): | |
| 225 continue | |
| 226 trace = LoadingTrace.FromJsonFile(trace_path) | |
| 227 if trace.url in url_subresources: | |
| 228 continue | |
| 229 logging.info('lists resources of %s from %s' % (trace.url, trace_path)) | |
| 230 urls_set = set() | |
| 231 for request_event in trace.request_track.GetEvents(): | |
| 232 if not request_event.protocol.startswith('http'): | |
| 233 continue | |
| 234 if request_event.url not in urls_set: | |
| 235 logging.info(' %s' % request_event.url) | |
| 236 urls_set.add(request_event.url) | |
| 237 url_subresources[trace.url] = [url for url in urls_set] | |
| 238 return url_subresources | |
| 239 | |
| 240 | |
| 241 def ValidateCacheArchiveContent(ref_urls, cache_archive_path): | |
| 242 """Validates a cache archive content. | |
| 243 | |
| 244 Args: | |
| 245 ref_urls: Reference list of urls. | |
| 246 cache_archive_path: Cache archive's path to validate. | |
| 247 """ | |
| 248 logging.info('lists cached urls from %s' % cache_archive_path) | |
| 249 with common_util.TemporaryDirectory() as cache_directory: | |
| 250 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory) | |
| 251 cached_urls = \ | |
| 252 chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys() | |
| 253 CompareUrlSet(set(ref_urls), set(cached_urls), 'cached resources', | |
| 254 debug_hint='Looks like a response header needs to be patched.') | |
| OLD | NEW |