OLD | NEW |
---|---|
1 # Copyright 2016 The Chromium Authors. All rights reserved. | 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 import csv | 5 import csv |
pasko
2016/06/03 17:13:46
Let's add a top-level comment like:
# Implements a
gabadie
2016/06/06 09:43:15
Done.
| |
6 import logging | 6 import logging |
7 import json | 7 import json |
8 import os | 8 import os |
9 import re | 9 import re |
10 import shutil | 10 import shutil |
11 from urlparse import urlparse | 11 from urlparse import urlparse |
12 | 12 |
13 import chrome_cache | 13 import chrome_cache |
14 import common_util | 14 import common_util |
15 import loading_trace | 15 import loading_trace |
(...skipping 175 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
191 elif subresource_discoverer == PARSER_DISCOVERER: | 191 elif subresource_discoverer == PARSER_DISCOVERER: |
192 discovered_requests = PrefetchSimulationView.ParserDiscoverableRequests( | 192 discovered_requests = PrefetchSimulationView.ParserDiscoverableRequests( |
193 first_resource_request, dependencies_lens) | 193 first_resource_request, dependencies_lens) |
194 elif subresource_discoverer == HTML_PRELOAD_SCANNER_DISCOVERER: | 194 elif subresource_discoverer == HTML_PRELOAD_SCANNER_DISCOVERER: |
195 discovered_requests = PrefetchSimulationView.PreloadedRequests( | 195 discovered_requests = PrefetchSimulationView.PreloadedRequests( |
196 first_resource_request, dependencies_lens, trace) | 196 first_resource_request, dependencies_lens, trace) |
197 else: | 197 else: |
198 assert False | 198 assert False |
199 | 199 |
200 whitelisted_urls = set() | 200 whitelisted_urls = set() |
201 logging.info('white-listing %s' % first_resource_request.url) | |
202 for request in _FilterOutDataAndIncompleteRequests(discovered_requests): | 201 for request in _FilterOutDataAndIncompleteRequests(discovered_requests): |
203 logging.info('white-listing %s' % request.url) | 202 logging.debug('white-listing %s', request.url) |
204 whitelisted_urls.add(request.url) | 203 whitelisted_urls.add(request.url) |
204 logging.info('number of white-listed resources: %d', len(whitelisted_urls)) | |
205 return whitelisted_urls | 205 return whitelisted_urls |
206 | 206 |
207 | 207 |
208 def _PrintUrlSetComparison(ref_url_set, url_set, url_set_name): | 208 def _PrintUrlSetComparison(ref_url_set, url_set, url_set_name): |
209 """Compare URL sets and log the diffs. | 209 """Compare URL sets and log the diffs. |
210 | 210 |
211 Args: | 211 Args: |
212 ref_url_set: Set of reference urls. | 212 ref_url_set: Set of reference urls. |
213 url_set: Set of urls to compare to the reference. | 213 url_set: Set of urls to compare to the reference. |
214 url_set_name: The set name for logging purposes. | 214 url_set_name: The set name for logging purposes. |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
254 request_event.method.upper().strip() == 'POST'): | 254 request_event.method.upper().strip() == 'POST'): |
255 urls.add(request_event.url) | 255 urls.add(request_event.url) |
256 elif (request_kind == _RequestOutcome.NotServedFromCache and | 256 elif (request_kind == _RequestOutcome.NotServedFromCache and |
257 not request_event.from_disk_cache): | 257 not request_event.from_disk_cache): |
258 urls.add(request_event.url) | 258 urls.add(request_event.url) |
259 elif request_kind == _RequestOutcome.All: | 259 elif request_kind == _RequestOutcome.All: |
260 urls.add(request_event.url) | 260 urls.add(request_event.url) |
261 return urls | 261 return urls |
262 | 262 |
263 | 263 |
264 def _VerifyBenchmarkOutputDirectory(benchmark_setup_path, | 264 class _RunOutputVerifier(object): |
265 benchmark_output_directory_path): | 265 """Object to verify benchmark run from traces and WPR log stored in the |
266 """Verifies that all run inside the run_output_directory worked as expected. | 266 runner output directory. |
267 """ | |
267 | 268 |
268 Args: | 269 def __init__(self, cache_validation_result, benchmark_setup): |
269 benchmark_setup_path: Path of the JSON of the benchmark setup. | 270 """Constructor. |
270 benchmark_output_directory_path: Path of the benchmark output directory to | |
271 verify. | |
272 """ | |
273 # TODO(gabadie): What's the best way of propagating errors happening in here? | |
274 benchmark_setup = json.load(open(benchmark_setup_path)) | |
275 cache_whitelist = set(benchmark_setup['cache_whitelist']) | |
276 original_requests = set(benchmark_setup['url_resources']) | |
277 original_cached_requests = original_requests.intersection(cache_whitelist) | |
278 original_uncached_requests = original_requests.difference(cache_whitelist) | |
279 all_sent_url_requests = set() | |
280 | 271 |
281 # Verify requests from traces. | 272 Args: |
282 run_id = -1 | 273 cache_validation_result: JSON of the cache validation task. |
283 while True: | 274 benchmark_setup: JSON of the benchmark setup. |
284 run_id += 1 | 275 """ |
285 run_path = os.path.join(benchmark_output_directory_path, str(run_id)) | 276 self._cache_whitelist = set(benchmark_setup['cache_whitelist']) |
286 if not os.path.isdir(run_path): | 277 self._original_requests = set(cache_validation_result['effective_requests']) |
287 break | 278 self._original_post_requests = set( |
288 trace_path = os.path.join(run_path, sandwich_runner.TRACE_FILENAME) | 279 cache_validation_result['effective_post_requests']) |
289 if not os.path.isfile(trace_path): | 280 self._original_cached_requests = self._original_requests.intersection( |
290 logging.error('missing trace %s' % trace_path) | 281 self._cache_whitelist) |
291 continue | 282 self._original_uncached_requests = self._original_requests.difference( |
292 trace = loading_trace.LoadingTrace.FromJsonFile(trace_path) | 283 self._cache_whitelist) |
293 logging.info('verifying %s from %s' % (trace.url, trace_path)) | 284 self._all_sent_url_requests = set() |
294 | 285 |
286 def VerifyTrace(self, trace): | |
287 """Verifies a trace with the cache validation result and the benchmark | |
288 setup. | |
289 """ | |
295 effective_requests = _ListUrlRequests(trace, _RequestOutcome.All) | 290 effective_requests = _ListUrlRequests(trace, _RequestOutcome.All) |
296 effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post) | 291 effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post) |
297 effective_cached_requests = \ | 292 effective_cached_requests = \ |
298 _ListUrlRequests(trace, _RequestOutcome.ServedFromCache) | 293 _ListUrlRequests(trace, _RequestOutcome.ServedFromCache) |
299 effective_uncached_requests = \ | 294 effective_uncached_requests = \ |
300 _ListUrlRequests(trace, _RequestOutcome.NotServedFromCache) | 295 _ListUrlRequests(trace, _RequestOutcome.NotServedFromCache) |
301 | 296 |
302 missing_requests = original_requests.difference(effective_requests) | 297 missing_requests = self._original_requests.difference(effective_requests) |
303 unexpected_requests = effective_requests.difference(original_requests) | 298 unexpected_requests = effective_requests.difference(self._original_requests) |
304 expected_cached_requests = \ | 299 expected_cached_requests = \ |
305 original_cached_requests.difference(missing_requests) | 300 self._original_cached_requests.difference(missing_requests) |
306 missing_cached_requests = \ | 301 expected_uncached_requests = self._original_uncached_requests.union( |
307 expected_cached_requests.difference(effective_cached_requests) | 302 unexpected_requests).difference(missing_requests) |
308 expected_uncached_requests = original_uncached_requests.union( | |
309 unexpected_requests).union(missing_cached_requests) | |
310 all_sent_url_requests.update(effective_uncached_requests) | |
311 | 303 |
312 # POST requests are known to be unable to use the cache. | 304 # POST requests are known to be unable to use the cache. |
313 expected_cached_requests.difference_update(effective_post_requests) | 305 expected_cached_requests.difference_update(effective_post_requests) |
314 expected_uncached_requests.update(effective_post_requests) | 306 expected_uncached_requests.update(effective_post_requests) |
315 | 307 |
316 _PrintUrlSetComparison(original_requests, effective_requests, | 308 _PrintUrlSetComparison(self._original_requests, effective_requests, |
317 'All resources') | 309 'All resources') |
318 _PrintUrlSetComparison(set(), effective_post_requests, | 310 _PrintUrlSetComparison(set(), effective_post_requests, 'POST resources') |
319 'POST resources') | |
320 _PrintUrlSetComparison(expected_cached_requests, effective_cached_requests, | 311 _PrintUrlSetComparison(expected_cached_requests, effective_cached_requests, |
321 'Cached resources') | 312 'Cached resources') |
322 _PrintUrlSetComparison(expected_uncached_requests, | 313 _PrintUrlSetComparison(expected_uncached_requests, |
323 effective_uncached_requests, 'Non cached resources') | 314 effective_uncached_requests, 'Non cached resources') |
324 | 315 |
325 # Verify requests from WPR. | 316 self._all_sent_url_requests.update(effective_uncached_requests) |
326 wpr_log_path = os.path.join( | |
327 benchmark_output_directory_path, sandwich_runner.WPR_LOG_FILENAME) | |
328 logging.info('verifying requests from %s' % wpr_log_path) | |
329 all_wpr_requests = wpr_backend.ExtractRequestsFromLog(wpr_log_path) | |
330 all_wpr_urls = set() | |
331 unserved_wpr_urls = set() | |
332 wpr_command_colliding_urls = set() | |
333 | 317 |
334 for request in all_wpr_requests: | 318 def VerifyWprLog(self, wpr_log_path): |
335 if request.is_wpr_host: | 319 """Verifies WPR log with previously verified traces.""" |
336 continue | 320 all_wpr_requests = wpr_backend.ExtractRequestsFromLog(wpr_log_path) |
337 if urlparse(request.url).path.startswith('/web-page-replay'): | 321 all_wpr_urls = set() |
338 wpr_command_colliding_urls.add(request.url) | 322 unserved_wpr_urls = set() |
339 elif request.is_served is False: | 323 wpr_command_colliding_urls = set() |
340 unserved_wpr_urls.add(request.url) | |
341 all_wpr_urls.add(request.url) | |
342 | 324 |
343 _PrintUrlSetComparison(set(), unserved_wpr_urls, | 325 for request in all_wpr_requests: |
344 'Distinct unserved resources from WPR') | 326 if request.is_wpr_host: |
345 _PrintUrlSetComparison(set(), wpr_command_colliding_urls, | 327 continue |
346 'Distinct resources colliding to WPR commands') | 328 if urlparse(request.url).path.startswith('/web-page-replay'): |
347 _PrintUrlSetComparison(all_wpr_urls, all_sent_url_requests, | 329 wpr_command_colliding_urls.add(request.url) |
348 'Distinct resource requests to WPR') | 330 elif request.is_served is False: |
331 unserved_wpr_urls.add(request.url) | |
332 all_wpr_urls.add(request.url) | |
349 | 333 |
350 | 334 _PrintUrlSetComparison(set(), unserved_wpr_urls, |
351 def _ReadSubresourceFromRunnerOutputDir(runner_output_dir): | 335 'Distinct unserved resources from WPR') |
352 """Extracts a list of subresources in runner output directory. | 336 _PrintUrlSetComparison(set(), wpr_command_colliding_urls, |
353 | 337 'Distinct resources colliding to WPR commands') |
354 Args: | 338 _PrintUrlSetComparison(all_wpr_urls, self._all_sent_url_requests, |
355 runner_output_dir: Path of the runner's output directory. | 339 'Distinct resource requests to WPR') |
356 | |
357 Returns: | |
358 [URLs of sub-resources] | |
359 """ | |
360 trace_path = os.path.join( | |
361 runner_output_dir, '0', sandwich_runner.TRACE_FILENAME) | |
362 trace = loading_trace.LoadingTrace.FromJsonFile(trace_path) | |
363 url_set = set() | |
364 for request_event in _FilterOutDataAndIncompleteRequests( | |
365 trace.request_track.GetEvents()): | |
366 url_set.add(request_event.url) | |
367 logging.info('lists %s resources of %s from %s' % \ | |
368 (len(url_set), trace.url, trace_path)) | |
369 return [url for url in url_set] | |
370 | 340 |
371 | 341 |
372 def _ValidateCacheArchiveContent(cache_build_trace_path, cache_archive_path): | 342 def _ValidateCacheArchiveContent(cache_build_trace_path, cache_archive_path): |
373 """Validates a cache archive content. | 343 """Validates a cache archive content. |
374 | 344 |
375 Args: | 345 Args: |
376 cache_build_trace_path: Path of the generated trace at the cache build time. | 346 cache_build_trace_path: Path of the generated trace at the cache build time. |
377 cache_archive_path: Cache archive's path to validate. | 347 cache_archive_path: Cache archive's path to validate. |
348 | |
349 Returns: | |
350 { | |
351 'effective_requests': [URLs of all requests], | |
352 'effective_post_requests': [URLs of POST requests], | |
353 'expected_cached_resources': [URLs of resources expected to be cached] | |
pasko
2016/06/03 17:13:46
nit: comma at the end
gabadie
2016/06/06 09:43:15
Done.
| |
354 'successfully_cached': [URLs of cached sub-resources] | |
355 } | |
378 """ | 356 """ |
379 # TODO(gabadie): What's the best way of propagating errors happening in here? | 357 # TODO(gabadie): What's the best way of propagating errors happening in here? |
380 logging.info('lists cached urls from %s' % cache_archive_path) | 358 logging.info('lists cached urls from %s' % cache_archive_path) |
381 with common_util.TemporaryDirectory() as cache_directory: | 359 with common_util.TemporaryDirectory() as cache_directory: |
382 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory) | 360 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory) |
383 cache_keys = set( | 361 cache_keys = set( |
384 chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys()) | 362 chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys()) |
385 trace = loading_trace.LoadingTrace.FromJsonFile(cache_build_trace_path) | 363 trace = loading_trace.LoadingTrace.FromJsonFile(cache_build_trace_path) |
386 effective_requests = _ListUrlRequests(trace, _RequestOutcome.All) | 364 effective_requests = _ListUrlRequests(trace, _RequestOutcome.All) |
387 effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post) | 365 effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post) |
(...skipping 10 matching lines...) Expand all Loading... | |
398 expected_cached_requests = effective_requests.difference( | 376 expected_cached_requests = effective_requests.difference( |
399 effective_post_requests) | 377 effective_post_requests) |
400 effective_cache_keys = cache_keys.difference( | 378 effective_cache_keys = cache_keys.difference( |
401 upload_data_stream_cache_entry_keys) | 379 upload_data_stream_cache_entry_keys) |
402 | 380 |
403 _PrintUrlSetComparison(effective_post_requests, upload_data_stream_requests, | 381 _PrintUrlSetComparison(effective_post_requests, upload_data_stream_requests, |
404 'POST resources') | 382 'POST resources') |
405 _PrintUrlSetComparison(expected_cached_requests, effective_cache_keys, | 383 _PrintUrlSetComparison(expected_cached_requests, effective_cache_keys, |
406 'Cached resources') | 384 'Cached resources') |
407 | 385 |
386 return { | |
387 'effective_requests': [url for url in effective_requests], | |
388 'effective_post_requests': [url for url in effective_post_requests], | |
389 'expected_cached_resources': [url for url in expected_cached_requests], | |
390 'successfully_cached_resources': [url for url in effective_cache_keys] | |
391 } | |
392 | |
393 | |
394 def _ProcessRunOutputDir( | |
395 cache_validation_result, benchmark_setup, runner_output_dir): | |
396 """Process benchmark's run output directory. | |
397 | |
398 Args: | |
399 cache_validation_result: Same as for _RunOutputVerifier | |
400 benchmark_setup: Same as for _RunOutputVerifier | |
401 runner_output_dir: Same as for SandwichRunner.output_dir | |
402 | |
403 Returns: | |
404 List of dictionary. | |
405 """ | |
406 run_metrics_list = [] | |
407 run_output_verifier = _RunOutputVerifier( | |
408 cache_validation_result, benchmark_setup) | |
409 for repeat_id, repeat_dir in sandwich_runner.WalkRepeatedRuns( | |
410 runner_output_dir): | |
411 trace_path = os.path.join(repeat_dir, sandwich_runner.TRACE_FILENAME) | |
412 | |
413 logging.info('loading trace: %s', trace_path) | |
414 trace = loading_trace.LoadingTrace.FromJsonFile(trace_path) | |
415 | |
416 logging.info('verifying trace: %s', trace_path) | |
417 run_output_verifier.VerifyTrace(trace) | |
418 | |
419 logging.info('extracting metrics from trace: %s', trace_path) | |
420 run_metrics = { | |
421 'url': trace.url, | |
422 'repeat_id': repeat_id, | |
423 'subresource_discoverer': benchmark_setup['subresource_discoverer'], | |
424 'cache_recording.subresource_count': | |
425 len(cache_validation_result['effective_requests']), | |
426 'cache_recording.cached_subresource_count_theoretic': | |
427 len(cache_validation_result['successfully_cached_resources']), | |
428 'cache_recording.cached_subresource_count': | |
429 len(cache_validation_result['expected_cached_resources']), | |
430 'benchmark_repeat.subresource_count': len(_ListUrlRequests( | |
431 trace, _RequestOutcome.All)), | |
432 'benchmark_repeat.served_from_cache_count_theoretic': | |
433 len(benchmark_setup['cache_whitelist']), | |
434 'benchmark_repeat.served_from_cache_count': len(_ListUrlRequests( | |
435 trace, _RequestOutcome.ServedFromCache)), | |
436 } | |
437 run_metrics.update( | |
438 sandwich_metrics.ExtractCommonMetricsFromRepeatDirectory( | |
439 repeat_dir, trace)) | |
440 run_metrics_list.append(run_metrics) | |
441 run_metrics_list.sort(key=lambda e: e['repeat_id']) | |
442 | |
443 wpr_log_path = os.path.join( | |
444 runner_output_dir, sandwich_runner.WPR_LOG_FILENAME) | |
445 logging.info('verifying wpr log: %s', wpr_log_path) | |
446 run_output_verifier.VerifyWprLog(wpr_log_path) | |
447 return run_metrics_list | |
448 | |
408 | 449 |
409 class PrefetchBenchmarkBuilder(task_manager.Builder): | 450 class PrefetchBenchmarkBuilder(task_manager.Builder): |
410 """A builder for a graph of tasks for NoState-Prefetch emulated benchmarks.""" | 451 """A builder for a graph of tasks for NoState-Prefetch emulated benchmarks.""" |
411 | 452 |
412 def __init__(self, common_builder): | 453 def __init__(self, common_builder): |
413 task_manager.Builder.__init__(self, | 454 task_manager.Builder.__init__(self, |
414 common_builder.output_directory, | 455 common_builder.output_directory, |
415 common_builder.output_subdirectory) | 456 common_builder.output_subdirectory) |
416 self._common_builder = common_builder | 457 self._common_builder = common_builder |
417 | 458 |
418 self._patched_wpr_task = None | 459 self._patched_wpr_task = None |
419 self._reference_cache_task = None | 460 self._cache_task = None |
420 self._trace_from_grabbing_reference_cache = None | 461 self._trace_from_grabbing_reference_cache = None |
421 self._subresources_for_urls_task = None | 462 self._cache_validation_task = None |
pasko
2016/06/03 17:13:46
I would prefer to just keep a path here than the w
gabadie
2016/06/06 09:43:14
Done.
| |
422 self._PopulateCommonPipelines() | 463 self._PopulateCommonPipelines() |
423 | 464 |
424 def _PopulateCommonPipelines(self): | 465 def _PopulateCommonPipelines(self): |
425 """Creates necessary tasks to produce initial cache archive. | 466 """Creates necessary tasks to produce initial cache archive. |
426 | 467 |
427 Also creates a task for producing a json file with a mapping of URLs to | 468 Also creates a task for producing a json file with a mapping of URLs to |
428 subresources (urls-resources.json). | 469 subresources (urls-resources.json). |
429 | 470 |
430 Here is the full dependency tree for the returned task: | 471 Here is the full dependency tree for the returned task: |
431 common/patched-cache-validation.log | 472 common/patched-cache-validation.json |
pasko
2016/06/03 17:13:46
probably should rename later to something like pat
gabadie
2016/06/06 09:43:14
Ok will make it in a separate CL.
| |
432 depends on: common/patched-cache.zip | 473 depends on: common/patched-cache.zip |
433 depends on: common/original-cache.zip | 474 depends on: common/original-cache.zip |
434 depends on: common/webpages-patched.wpr | 475 depends on: common/webpages-patched.wpr |
435 depends on: common/webpages.wpr | 476 depends on: common/webpages.wpr |
436 depends on: common/urls-resources.json | |
437 depends on: common/original-cache.zip | |
438 """ | 477 """ |
439 @self.RegisterTask('common/webpages-patched.wpr', | 478 @self.RegisterTask('common/webpages-patched.wpr', |
440 dependencies=[self._common_builder.original_wpr_task]) | 479 dependencies=[self._common_builder.original_wpr_task]) |
441 def BuildPatchedWpr(): | 480 def BuildPatchedWpr(): |
442 common_util.EnsureParentDirectoryExists(BuildPatchedWpr.path) | 481 common_util.EnsureParentDirectoryExists(BuildPatchedWpr.path) |
443 shutil.copyfile( | 482 shutil.copyfile( |
444 self._common_builder.original_wpr_task.path, BuildPatchedWpr.path) | 483 self._common_builder.original_wpr_task.path, BuildPatchedWpr.path) |
445 _PatchWpr(BuildPatchedWpr.path) | 484 _PatchWpr(BuildPatchedWpr.path) |
446 | 485 |
447 @self.RegisterTask('common/original-cache.zip', [BuildPatchedWpr]) | 486 @self.RegisterTask('common/original-cache.zip', [BuildPatchedWpr]) |
448 def BuildOriginalCache(): | 487 def BuildOriginalCache(): |
449 runner = self._common_builder.CreateSandwichRunner() | 488 runner = self._common_builder.CreateSandwichRunner() |
450 runner.wpr_archive_path = BuildPatchedWpr.path | 489 runner.wpr_archive_path = BuildPatchedWpr.path |
451 runner.cache_archive_path = BuildOriginalCache.path | 490 runner.cache_archive_path = BuildOriginalCache.path |
452 runner.cache_operation = sandwich_runner.CacheOperation.SAVE | 491 runner.cache_operation = sandwich_runner.CacheOperation.SAVE |
453 runner.output_dir = BuildOriginalCache.run_path | 492 runner.output_dir = BuildOriginalCache.run_path |
454 runner.Run() | 493 runner.Run() |
455 BuildOriginalCache.run_path = BuildOriginalCache.path[:-4] + '-run' | 494 BuildOriginalCache.run_path = BuildOriginalCache.path[:-4] + '-run' |
456 original_cache_trace_path = os.path.join( | 495 original_cache_trace_path = os.path.join( |
457 BuildOriginalCache.run_path, '0', sandwich_runner.TRACE_FILENAME) | 496 BuildOriginalCache.run_path, '0', sandwich_runner.TRACE_FILENAME) |
458 | 497 |
459 @self.RegisterTask('common/patched-cache.zip', [BuildOriginalCache]) | 498 @self.RegisterTask('common/patched-cache.zip', [BuildOriginalCache]) |
460 def BuildPatchedCache(): | 499 def BuildPatchedCache(): |
461 _PatchCacheArchive(BuildOriginalCache.path, | 500 _PatchCacheArchive(BuildOriginalCache.path, |
462 original_cache_trace_path, BuildPatchedCache.path) | 501 original_cache_trace_path, BuildPatchedCache.path) |
463 | 502 |
464 @self.RegisterTask('common/subresources-for-urls.json', | 503 @self.RegisterTask('common/patched-cache-validation.json', |
465 [BuildOriginalCache]) | |
466 def ListUrlsResources(): | |
467 url_resources = _ReadSubresourceFromRunnerOutputDir( | |
468 BuildOriginalCache.run_path) | |
469 with open(ListUrlsResources.path, 'w') as output: | |
470 json.dump(url_resources, output) | |
471 | |
472 @self.RegisterTask('common/patched-cache-validation.log', | |
473 [BuildPatchedCache]) | 504 [BuildPatchedCache]) |
474 def ValidatePatchedCache(): | 505 def ValidatePatchedCache(): |
475 handler = logging.FileHandler(ValidatePatchedCache.path) | 506 cache_validation_result = _ValidateCacheArchiveContent( |
476 logging.getLogger().addHandler(handler) | 507 original_cache_trace_path, BuildPatchedCache.path) |
477 try: | 508 with open(ValidatePatchedCache.path, 'w') as output: |
478 _ValidateCacheArchiveContent( | 509 json.dump(cache_validation_result, output) |
479 original_cache_trace_path, BuildPatchedCache.path) | |
480 finally: | |
481 logging.getLogger().removeHandler(handler) | |
482 | 510 |
483 self._patched_wpr_task = BuildPatchedWpr | 511 self._patched_wpr_task = BuildPatchedWpr |
484 self._trace_from_grabbing_reference_cache = original_cache_trace_path | 512 self._trace_from_grabbing_reference_cache = original_cache_trace_path |
485 self._reference_cache_task = BuildPatchedCache | 513 self._cache_task = BuildPatchedCache |
486 self._subresources_for_urls_task = ListUrlsResources | 514 self._cache_validation_task = ValidatePatchedCache |
487 | 515 |
488 self._common_builder.default_final_tasks.append(ValidatePatchedCache) | 516 self._common_builder.default_final_tasks.append(ValidatePatchedCache) |
489 | 517 |
490 def PopulateLoadBenchmark(self, subresource_discoverer, | 518 def PopulateLoadBenchmark(self, subresource_discoverer, |
491 transformer_list_name, transformer_list): | 519 transformer_list_name, transformer_list): |
492 """Populate benchmarking tasks from its setup tasks. | 520 """Populate benchmarking tasks from its setup tasks. |
493 | 521 |
494 Args: | 522 Args: |
495 subresource_discoverer: Name of a subresources discoverer. | 523 subresource_discoverer: Name of a subresources discoverer. |
496 transformer_list_name: A string describing the transformers, will be used | 524 transformer_list_name: A string describing the transformers, will be used |
497 in Task names (prefer names without spaces and special characters). | 525 in Task names (prefer names without spaces and special characters). |
498 transformer_list: An ordered list of function that takes an instance of | 526 transformer_list: An ordered list of function that takes an instance of |
499 SandwichRunner as parameter, would be applied immediately before | 527 SandwichRunner as parameter, would be applied immediately before |
500 SandwichRunner.Run() in the given order. | 528 SandwichRunner.Run() in the given order. |
501 | 529 |
502 Here is the full dependency of the added tree for the returned task: | 530 Here is the full dependency of the added tree for the returned task: |
503 <transformer_list_name>/<subresource_discoverer>-metrics.csv | 531 <transformer_list_name>/<subresource_discoverer>-metrics.csv |
504 depends on: <transformer_list_name>/<subresource_discoverer>-run/ | 532 depends on: <transformer_list_name>/<subresource_discoverer>-run/ |
505 depends on: common/<subresource_discoverer>-cache.zip | 533 depends on: common/<subresource_discoverer>-cache.zip |
506 depends on: some tasks saved by PopulateCommonPipelines() | 534 depends on: some tasks saved by PopulateCommonPipelines() |
pasko
2016/06/03 17:13:46
only depends on -setup.json, right?
gabadie
2016/06/06 09:43:14
Done.
| |
507 depends on: common/<subresource_discoverer>-setup.json | 535 depends on: common/<subresource_discoverer>-setup.json |
508 depends on: some tasks saved by PopulateCommonPipelines() | 536 depends on: some tasks saved by PopulateCommonPipelines() |
pasko
2016/06/03 17:13:46
some? should it just say common/patched-cache-vali
gabadie
2016/06/06 09:43:14
Done.
| |
509 """ | 537 """ |
510 additional_column_names = [ | 538 additional_column_names = [ |
511 'url', | 539 'url', |
512 'repeat_id', | 540 'repeat_id', |
513 'subresource_discoverer', | 541 'subresource_discoverer', |
514 'subresource_count', | 542 'cache_recording.subresource_count', |
515 # The amount of subresources detected at SetupBenchmark step. | 543 'cache_recording.cached_subresource_count_theoretic', |
516 'subresource_count_theoretic', | 544 'cache_recording.cached_subresource_count', |
517 # Amount of subresources for caching as suggested by the subresource | 545 'benchmark_repeat.subresource_count', |
pasko
2016/06/03 17:13:46
'benchmark_repeat' sounds more like a command than
gabadie
2016/06/06 09:43:15
Done.
| |
518 # discoverer. | 546 'benchmark_repeat.served_from_cache_count_theoretic', |
519 'cached_subresource_count_theoretic', | 547 'benchmark_repeat.served_from_cache_count'] |
520 'cached_subresource_count'] | |
521 | 548 |
522 assert subresource_discoverer in SUBRESOURCE_DISCOVERERS | 549 assert subresource_discoverer in SUBRESOURCE_DISCOVERERS |
523 assert 'common' not in SUBRESOURCE_DISCOVERERS | 550 assert 'common' not in SUBRESOURCE_DISCOVERERS |
524 shared_task_prefix = os.path.join('common', subresource_discoverer) | 551 shared_task_prefix = os.path.join('common', subresource_discoverer) |
525 task_prefix = os.path.join(transformer_list_name, subresource_discoverer) | 552 task_prefix = os.path.join(transformer_list_name, subresource_discoverer) |
526 | 553 |
527 @self.RegisterTask(shared_task_prefix + '-setup.json', merge=True, | 554 @self.RegisterTask(shared_task_prefix + '-setup.json', merge=True, |
528 dependencies=[self._subresources_for_urls_task]) | 555 dependencies=[self._cache_validation_task]) |
529 def SetupBenchmark(): | 556 def SetupBenchmark(): |
530 whitelisted_urls = _ExtractDiscoverableUrls( | 557 whitelisted_urls = _ExtractDiscoverableUrls( |
531 self._trace_from_grabbing_reference_cache, subresource_discoverer) | 558 self._trace_from_grabbing_reference_cache, subresource_discoverer) |
532 | 559 |
533 url_resources = json.load(open(self._subresources_for_urls_task.path)) | |
534 common_util.EnsureParentDirectoryExists(SetupBenchmark.path) | 560 common_util.EnsureParentDirectoryExists(SetupBenchmark.path) |
535 with open(SetupBenchmark.path, 'w') as output: | 561 with open(SetupBenchmark.path, 'w') as output: |
536 json.dump({ | 562 json.dump({ |
537 'cache_whitelist': [url for url in whitelisted_urls], | 563 'cache_whitelist': [url for url in whitelisted_urls], |
538 'subresource_discoverer': subresource_discoverer, | 564 'subresource_discoverer': subresource_discoverer, |
539 'url_resources': url_resources, | |
540 }, output) | 565 }, output) |
541 | 566 |
542 @self.RegisterTask(shared_task_prefix + '-cache.zip', merge=True, | 567 @self.RegisterTask(shared_task_prefix + '-cache.zip', merge=True, |
543 dependencies=[ | 568 dependencies=[SetupBenchmark]) |
544 SetupBenchmark, self._reference_cache_task]) | |
545 def BuildBenchmarkCacheArchive(): | 569 def BuildBenchmarkCacheArchive(): |
546 setup = json.load(open(SetupBenchmark.path)) | 570 benchmark_setup = json.load(open(SetupBenchmark.path)) |
547 chrome_cache.ApplyUrlWhitelistToCacheArchive( | 571 chrome_cache.ApplyUrlWhitelistToCacheArchive( |
548 cache_archive_path=self._reference_cache_task.path, | 572 cache_archive_path=self._cache_task.path, |
549 whitelisted_urls=setup['cache_whitelist'], | 573 whitelisted_urls=benchmark_setup['cache_whitelist'], |
550 output_cache_archive_path=BuildBenchmarkCacheArchive.path) | 574 output_cache_archive_path=BuildBenchmarkCacheArchive.path) |
551 | 575 |
552 @self.RegisterTask(task_prefix + '-run/', | 576 @self.RegisterTask(task_prefix + '-run/', |
553 dependencies=[BuildBenchmarkCacheArchive]) | 577 dependencies=[BuildBenchmarkCacheArchive]) |
554 def RunBenchmark(): | 578 def RunBenchmark(): |
555 runner = self._common_builder.CreateSandwichRunner() | 579 runner = self._common_builder.CreateSandwichRunner() |
556 for transformer in transformer_list: | 580 for transformer in transformer_list: |
557 transformer(runner) | 581 transformer(runner) |
558 runner.wpr_archive_path = self._patched_wpr_task.path | 582 runner.wpr_archive_path = self._patched_wpr_task.path |
559 runner.wpr_out_log_path = os.path.join( | 583 runner.wpr_out_log_path = os.path.join( |
560 RunBenchmark.path, sandwich_runner.WPR_LOG_FILENAME) | 584 RunBenchmark.path, sandwich_runner.WPR_LOG_FILENAME) |
561 runner.cache_archive_path = BuildBenchmarkCacheArchive.path | 585 runner.cache_archive_path = BuildBenchmarkCacheArchive.path |
562 runner.cache_operation = sandwich_runner.CacheOperation.PUSH | 586 runner.cache_operation = sandwich_runner.CacheOperation.PUSH |
563 runner.output_dir = RunBenchmark.path | 587 runner.output_dir = RunBenchmark.path |
564 runner.Run() | 588 runner.Run() |
565 | 589 |
566 @self.RegisterTask(task_prefix + '-metrics.csv', | 590 @self.RegisterTask(task_prefix + '-metrics.csv', |
567 dependencies=[RunBenchmark]) | 591 dependencies=[RunBenchmark]) |
568 def ExtractMetrics(): | 592 def ProcessRunOutputDir(): |
569 # TODO(gabadie): Performance improvement: load each trace only once and | 593 benchmark_setup = json.load(open(SetupBenchmark.path)) |
570 # use it for validation and extraction of metrics later. | 594 cache_validation_result = json.load( |
571 _VerifyBenchmarkOutputDirectory(SetupBenchmark.path, RunBenchmark.path) | 595 open(self._cache_validation_task.path)) |
572 | 596 |
573 benchmark_setup = json.load(open(SetupBenchmark.path)) | 597 run_metrics_list = _ProcessRunOutputDir( |
574 run_metrics_list = [] | 598 cache_validation_result, benchmark_setup, RunBenchmark.path) |
575 for repeat_id, repeat_dir in sandwich_runner.WalkRepeatedRuns( | 599 with open(ProcessRunOutputDir.path, 'w') as csv_file: |
576 RunBenchmark.path): | |
577 trace_path = os.path.join(repeat_dir, sandwich_runner.TRACE_FILENAME) | |
578 logging.info('processing trace: %s', trace_path) | |
579 trace = loading_trace.LoadingTrace.FromJsonFile(trace_path) | |
580 run_metrics = { | |
581 'url': trace.url, | |
582 'repeat_id': repeat_id, | |
583 'subresource_discoverer': benchmark_setup['subresource_discoverer'], | |
584 'subresource_count': len(_ListUrlRequests( | |
585 trace, _RequestOutcome.All)), | |
586 'subresource_count_theoretic': | |
587 len(benchmark_setup['url_resources']), | |
588 'cached_subresource_count': len(_ListUrlRequests( | |
589 trace, _RequestOutcome.ServedFromCache)), | |
590 'cached_subresource_count_theoretic': | |
591 len(benchmark_setup['cache_whitelist']), | |
592 } | |
593 run_metrics.update( | |
594 sandwich_metrics.ExtractCommonMetricsFromRepeatDirectory( | |
595 repeat_dir, trace)) | |
596 run_metrics_list.append(run_metrics) | |
597 | |
598 run_metrics_list.sort(key=lambda e: e['repeat_id']) | |
599 with open(ExtractMetrics.path, 'w') as csv_file: | |
600 writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names + | 600 writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names + |
601 sandwich_metrics.COMMON_CSV_COLUMN_NAMES)) | 601 sandwich_metrics.COMMON_CSV_COLUMN_NAMES)) |
602 writer.writeheader() | 602 writer.writeheader() |
603 for trace_metrics in run_metrics_list: | 603 for trace_metrics in run_metrics_list: |
604 writer.writerow(trace_metrics) | 604 writer.writerow(trace_metrics) |
605 | 605 |
606 self._common_builder.default_final_tasks.append(ExtractMetrics) | 606 self._common_builder.default_final_tasks.append(ProcessRunOutputDir) |
OLD | NEW |