Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. | 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 import csv | 5 import csv |
|
pasko
2016/06/03 17:13:46
Let's add a top-level comment like:
# Implements a
gabadie
2016/06/06 09:43:15
Done.
| |
| 6 import logging | 6 import logging |
| 7 import json | 7 import json |
| 8 import os | 8 import os |
| 9 import re | 9 import re |
| 10 import shutil | 10 import shutil |
| 11 from urlparse import urlparse | 11 from urlparse import urlparse |
| 12 | 12 |
| 13 import chrome_cache | 13 import chrome_cache |
| 14 import common_util | 14 import common_util |
| 15 import loading_trace | 15 import loading_trace |
| (...skipping 175 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 191 elif subresource_discoverer == PARSER_DISCOVERER: | 191 elif subresource_discoverer == PARSER_DISCOVERER: |
| 192 discovered_requests = PrefetchSimulationView.ParserDiscoverableRequests( | 192 discovered_requests = PrefetchSimulationView.ParserDiscoverableRequests( |
| 193 first_resource_request, dependencies_lens) | 193 first_resource_request, dependencies_lens) |
| 194 elif subresource_discoverer == HTML_PRELOAD_SCANNER_DISCOVERER: | 194 elif subresource_discoverer == HTML_PRELOAD_SCANNER_DISCOVERER: |
| 195 discovered_requests = PrefetchSimulationView.PreloadedRequests( | 195 discovered_requests = PrefetchSimulationView.PreloadedRequests( |
| 196 first_resource_request, dependencies_lens, trace) | 196 first_resource_request, dependencies_lens, trace) |
| 197 else: | 197 else: |
| 198 assert False | 198 assert False |
| 199 | 199 |
| 200 whitelisted_urls = set() | 200 whitelisted_urls = set() |
| 201 logging.info('white-listing %s' % first_resource_request.url) | |
| 202 for request in _FilterOutDataAndIncompleteRequests(discovered_requests): | 201 for request in _FilterOutDataAndIncompleteRequests(discovered_requests): |
| 203 logging.info('white-listing %s' % request.url) | 202 logging.debug('white-listing %s', request.url) |
| 204 whitelisted_urls.add(request.url) | 203 whitelisted_urls.add(request.url) |
| 204 logging.info('number of white-listed resources: %d', len(whitelisted_urls)) | |
| 205 return whitelisted_urls | 205 return whitelisted_urls |
| 206 | 206 |
| 207 | 207 |
| 208 def _PrintUrlSetComparison(ref_url_set, url_set, url_set_name): | 208 def _PrintUrlSetComparison(ref_url_set, url_set, url_set_name): |
| 209 """Compare URL sets and log the diffs. | 209 """Compare URL sets and log the diffs. |
| 210 | 210 |
| 211 Args: | 211 Args: |
| 212 ref_url_set: Set of reference urls. | 212 ref_url_set: Set of reference urls. |
| 213 url_set: Set of urls to compare to the reference. | 213 url_set: Set of urls to compare to the reference. |
| 214 url_set_name: The set name for logging purposes. | 214 url_set_name: The set name for logging purposes. |
| (...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 254 request_event.method.upper().strip() == 'POST'): | 254 request_event.method.upper().strip() == 'POST'): |
| 255 urls.add(request_event.url) | 255 urls.add(request_event.url) |
| 256 elif (request_kind == _RequestOutcome.NotServedFromCache and | 256 elif (request_kind == _RequestOutcome.NotServedFromCache and |
| 257 not request_event.from_disk_cache): | 257 not request_event.from_disk_cache): |
| 258 urls.add(request_event.url) | 258 urls.add(request_event.url) |
| 259 elif request_kind == _RequestOutcome.All: | 259 elif request_kind == _RequestOutcome.All: |
| 260 urls.add(request_event.url) | 260 urls.add(request_event.url) |
| 261 return urls | 261 return urls |
| 262 | 262 |
| 263 | 263 |
| 264 def _VerifyBenchmarkOutputDirectory(benchmark_setup_path, | 264 class _RunOutputVerifier(object): |
| 265 benchmark_output_directory_path): | 265 """Object to verify benchmark run from traces and WPR log stored in the |
| 266 """Verifies that all run inside the run_output_directory worked as expected. | 266 runner output directory. |
| 267 """ | |
| 267 | 268 |
| 268 Args: | 269 def __init__(self, cache_validation_result, benchmark_setup): |
| 269 benchmark_setup_path: Path of the JSON of the benchmark setup. | 270 """Constructor. |
| 270 benchmark_output_directory_path: Path of the benchmark output directory to | |
| 271 verify. | |
| 272 """ | |
| 273 # TODO(gabadie): What's the best way of propagating errors happening in here? | |
| 274 benchmark_setup = json.load(open(benchmark_setup_path)) | |
| 275 cache_whitelist = set(benchmark_setup['cache_whitelist']) | |
| 276 original_requests = set(benchmark_setup['url_resources']) | |
| 277 original_cached_requests = original_requests.intersection(cache_whitelist) | |
| 278 original_uncached_requests = original_requests.difference(cache_whitelist) | |
| 279 all_sent_url_requests = set() | |
| 280 | 271 |
| 281 # Verify requests from traces. | 272 Args: |
| 282 run_id = -1 | 273 cache_validation_result: JSON of the cache validation task. |
| 283 while True: | 274 benchmark_setup: JSON of the benchmark setup. |
| 284 run_id += 1 | 275 """ |
| 285 run_path = os.path.join(benchmark_output_directory_path, str(run_id)) | 276 self._cache_whitelist = set(benchmark_setup['cache_whitelist']) |
| 286 if not os.path.isdir(run_path): | 277 self._original_requests = set(cache_validation_result['effective_requests']) |
| 287 break | 278 self._original_post_requests = set( |
| 288 trace_path = os.path.join(run_path, sandwich_runner.TRACE_FILENAME) | 279 cache_validation_result['effective_post_requests']) |
| 289 if not os.path.isfile(trace_path): | 280 self._original_cached_requests = self._original_requests.intersection( |
| 290 logging.error('missing trace %s' % trace_path) | 281 self._cache_whitelist) |
| 291 continue | 282 self._original_uncached_requests = self._original_requests.difference( |
| 292 trace = loading_trace.LoadingTrace.FromJsonFile(trace_path) | 283 self._cache_whitelist) |
| 293 logging.info('verifying %s from %s' % (trace.url, trace_path)) | 284 self._all_sent_url_requests = set() |
| 294 | 285 |
| 286 def VerifyTrace(self, trace): | |
| 287 """Verifies a trace with the cache validation result and the benchmark | |
| 288 setup. | |
| 289 """ | |
| 295 effective_requests = _ListUrlRequests(trace, _RequestOutcome.All) | 290 effective_requests = _ListUrlRequests(trace, _RequestOutcome.All) |
| 296 effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post) | 291 effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post) |
| 297 effective_cached_requests = \ | 292 effective_cached_requests = \ |
| 298 _ListUrlRequests(trace, _RequestOutcome.ServedFromCache) | 293 _ListUrlRequests(trace, _RequestOutcome.ServedFromCache) |
| 299 effective_uncached_requests = \ | 294 effective_uncached_requests = \ |
| 300 _ListUrlRequests(trace, _RequestOutcome.NotServedFromCache) | 295 _ListUrlRequests(trace, _RequestOutcome.NotServedFromCache) |
| 301 | 296 |
| 302 missing_requests = original_requests.difference(effective_requests) | 297 missing_requests = self._original_requests.difference(effective_requests) |
| 303 unexpected_requests = effective_requests.difference(original_requests) | 298 unexpected_requests = effective_requests.difference(self._original_requests) |
| 304 expected_cached_requests = \ | 299 expected_cached_requests = \ |
| 305 original_cached_requests.difference(missing_requests) | 300 self._original_cached_requests.difference(missing_requests) |
| 306 missing_cached_requests = \ | 301 expected_uncached_requests = self._original_uncached_requests.union( |
| 307 expected_cached_requests.difference(effective_cached_requests) | 302 unexpected_requests).difference(missing_requests) |
| 308 expected_uncached_requests = original_uncached_requests.union( | |
| 309 unexpected_requests).union(missing_cached_requests) | |
| 310 all_sent_url_requests.update(effective_uncached_requests) | |
| 311 | 303 |
| 312 # POST requests are known to be unable to use the cache. | 304 # POST requests are known to be unable to use the cache. |
| 313 expected_cached_requests.difference_update(effective_post_requests) | 305 expected_cached_requests.difference_update(effective_post_requests) |
| 314 expected_uncached_requests.update(effective_post_requests) | 306 expected_uncached_requests.update(effective_post_requests) |
| 315 | 307 |
| 316 _PrintUrlSetComparison(original_requests, effective_requests, | 308 _PrintUrlSetComparison(self._original_requests, effective_requests, |
| 317 'All resources') | 309 'All resources') |
| 318 _PrintUrlSetComparison(set(), effective_post_requests, | 310 _PrintUrlSetComparison(set(), effective_post_requests, 'POST resources') |
| 319 'POST resources') | |
| 320 _PrintUrlSetComparison(expected_cached_requests, effective_cached_requests, | 311 _PrintUrlSetComparison(expected_cached_requests, effective_cached_requests, |
| 321 'Cached resources') | 312 'Cached resources') |
| 322 _PrintUrlSetComparison(expected_uncached_requests, | 313 _PrintUrlSetComparison(expected_uncached_requests, |
| 323 effective_uncached_requests, 'Non cached resources') | 314 effective_uncached_requests, 'Non cached resources') |
| 324 | 315 |
| 325 # Verify requests from WPR. | 316 self._all_sent_url_requests.update(effective_uncached_requests) |
| 326 wpr_log_path = os.path.join( | |
| 327 benchmark_output_directory_path, sandwich_runner.WPR_LOG_FILENAME) | |
| 328 logging.info('verifying requests from %s' % wpr_log_path) | |
| 329 all_wpr_requests = wpr_backend.ExtractRequestsFromLog(wpr_log_path) | |
| 330 all_wpr_urls = set() | |
| 331 unserved_wpr_urls = set() | |
| 332 wpr_command_colliding_urls = set() | |
| 333 | 317 |
| 334 for request in all_wpr_requests: | 318 def VerifyWprLog(self, wpr_log_path): |
| 335 if request.is_wpr_host: | 319 """Verifies WPR log with previously verified traces.""" |
| 336 continue | 320 all_wpr_requests = wpr_backend.ExtractRequestsFromLog(wpr_log_path) |
| 337 if urlparse(request.url).path.startswith('/web-page-replay'): | 321 all_wpr_urls = set() |
| 338 wpr_command_colliding_urls.add(request.url) | 322 unserved_wpr_urls = set() |
| 339 elif request.is_served is False: | 323 wpr_command_colliding_urls = set() |
| 340 unserved_wpr_urls.add(request.url) | |
| 341 all_wpr_urls.add(request.url) | |
| 342 | 324 |
| 343 _PrintUrlSetComparison(set(), unserved_wpr_urls, | 325 for request in all_wpr_requests: |
| 344 'Distinct unserved resources from WPR') | 326 if request.is_wpr_host: |
| 345 _PrintUrlSetComparison(set(), wpr_command_colliding_urls, | 327 continue |
| 346 'Distinct resources colliding to WPR commands') | 328 if urlparse(request.url).path.startswith('/web-page-replay'): |
| 347 _PrintUrlSetComparison(all_wpr_urls, all_sent_url_requests, | 329 wpr_command_colliding_urls.add(request.url) |
| 348 'Distinct resource requests to WPR') | 330 elif request.is_served is False: |
| 331 unserved_wpr_urls.add(request.url) | |
| 332 all_wpr_urls.add(request.url) | |
| 349 | 333 |
| 350 | 334 _PrintUrlSetComparison(set(), unserved_wpr_urls, |
| 351 def _ReadSubresourceFromRunnerOutputDir(runner_output_dir): | 335 'Distinct unserved resources from WPR') |
| 352 """Extracts a list of subresources in runner output directory. | 336 _PrintUrlSetComparison(set(), wpr_command_colliding_urls, |
| 353 | 337 'Distinct resources colliding to WPR commands') |
| 354 Args: | 338 _PrintUrlSetComparison(all_wpr_urls, self._all_sent_url_requests, |
| 355 runner_output_dir: Path of the runner's output directory. | 339 'Distinct resource requests to WPR') |
| 356 | |
| 357 Returns: | |
| 358 [URLs of sub-resources] | |
| 359 """ | |
| 360 trace_path = os.path.join( | |
| 361 runner_output_dir, '0', sandwich_runner.TRACE_FILENAME) | |
| 362 trace = loading_trace.LoadingTrace.FromJsonFile(trace_path) | |
| 363 url_set = set() | |
| 364 for request_event in _FilterOutDataAndIncompleteRequests( | |
| 365 trace.request_track.GetEvents()): | |
| 366 url_set.add(request_event.url) | |
| 367 logging.info('lists %s resources of %s from %s' % \ | |
| 368 (len(url_set), trace.url, trace_path)) | |
| 369 return [url for url in url_set] | |
| 370 | 340 |
| 371 | 341 |
| 372 def _ValidateCacheArchiveContent(cache_build_trace_path, cache_archive_path): | 342 def _ValidateCacheArchiveContent(cache_build_trace_path, cache_archive_path): |
| 373 """Validates a cache archive content. | 343 """Validates a cache archive content. |
| 374 | 344 |
| 375 Args: | 345 Args: |
| 376 cache_build_trace_path: Path of the generated trace at the cache build time. | 346 cache_build_trace_path: Path of the generated trace at the cache build time. |
| 377 cache_archive_path: Cache archive's path to validate. | 347 cache_archive_path: Cache archive's path to validate. |
| 348 | |
| 349 Returns: | |
| 350 { | |
| 351 'effective_requests': [URLs of all requests], | |
| 352 'effective_post_requests': [URLs of POST requests], | |
| 353 'expected_cached_resources': [URLs of resources expected to be cached] | |
|
pasko
2016/06/03 17:13:46
nit: comma at the end
gabadie
2016/06/06 09:43:15
Done.
| |
| 354 'successfully_cached': [URLs of cached sub-resources] | |
| 355 } | |
| 378 """ | 356 """ |
| 379 # TODO(gabadie): What's the best way of propagating errors happening in here? | 357 # TODO(gabadie): What's the best way of propagating errors happening in here? |
| 380 logging.info('lists cached urls from %s' % cache_archive_path) | 358 logging.info('lists cached urls from %s' % cache_archive_path) |
| 381 with common_util.TemporaryDirectory() as cache_directory: | 359 with common_util.TemporaryDirectory() as cache_directory: |
| 382 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory) | 360 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory) |
| 383 cache_keys = set( | 361 cache_keys = set( |
| 384 chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys()) | 362 chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys()) |
| 385 trace = loading_trace.LoadingTrace.FromJsonFile(cache_build_trace_path) | 363 trace = loading_trace.LoadingTrace.FromJsonFile(cache_build_trace_path) |
| 386 effective_requests = _ListUrlRequests(trace, _RequestOutcome.All) | 364 effective_requests = _ListUrlRequests(trace, _RequestOutcome.All) |
| 387 effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post) | 365 effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post) |
| (...skipping 10 matching lines...) Expand all Loading... | |
| 398 expected_cached_requests = effective_requests.difference( | 376 expected_cached_requests = effective_requests.difference( |
| 399 effective_post_requests) | 377 effective_post_requests) |
| 400 effective_cache_keys = cache_keys.difference( | 378 effective_cache_keys = cache_keys.difference( |
| 401 upload_data_stream_cache_entry_keys) | 379 upload_data_stream_cache_entry_keys) |
| 402 | 380 |
| 403 _PrintUrlSetComparison(effective_post_requests, upload_data_stream_requests, | 381 _PrintUrlSetComparison(effective_post_requests, upload_data_stream_requests, |
| 404 'POST resources') | 382 'POST resources') |
| 405 _PrintUrlSetComparison(expected_cached_requests, effective_cache_keys, | 383 _PrintUrlSetComparison(expected_cached_requests, effective_cache_keys, |
| 406 'Cached resources') | 384 'Cached resources') |
| 407 | 385 |
| 386 return { | |
| 387 'effective_requests': [url for url in effective_requests], | |
| 388 'effective_post_requests': [url for url in effective_post_requests], | |
| 389 'expected_cached_resources': [url for url in expected_cached_requests], | |
| 390 'successfully_cached_resources': [url for url in effective_cache_keys] | |
| 391 } | |
| 392 | |
| 393 | |
| 394 def _ProcessRunOutputDir( | |
| 395 cache_validation_result, benchmark_setup, runner_output_dir): | |
| 396 """Process benchmark's run output directory. | |
| 397 | |
| 398 Args: | |
| 399 cache_validation_result: Same as for _RunOutputVerifier | |
| 400 benchmark_setup: Same as for _RunOutputVerifier | |
| 401 runner_output_dir: Same as for SandwichRunner.output_dir | |
| 402 | |
| 403 Returns: | |
| 404 List of dictionary. | |
| 405 """ | |
| 406 run_metrics_list = [] | |
| 407 run_output_verifier = _RunOutputVerifier( | |
| 408 cache_validation_result, benchmark_setup) | |
| 409 for repeat_id, repeat_dir in sandwich_runner.WalkRepeatedRuns( | |
| 410 runner_output_dir): | |
| 411 trace_path = os.path.join(repeat_dir, sandwich_runner.TRACE_FILENAME) | |
| 412 | |
| 413 logging.info('loading trace: %s', trace_path) | |
| 414 trace = loading_trace.LoadingTrace.FromJsonFile(trace_path) | |
| 415 | |
| 416 logging.info('verifying trace: %s', trace_path) | |
| 417 run_output_verifier.VerifyTrace(trace) | |
| 418 | |
| 419 logging.info('extracting metrics from trace: %s', trace_path) | |
| 420 run_metrics = { | |
| 421 'url': trace.url, | |
| 422 'repeat_id': repeat_id, | |
| 423 'subresource_discoverer': benchmark_setup['subresource_discoverer'], | |
| 424 'cache_recording.subresource_count': | |
| 425 len(cache_validation_result['effective_requests']), | |
| 426 'cache_recording.cached_subresource_count_theoretic': | |
| 427 len(cache_validation_result['successfully_cached_resources']), | |
| 428 'cache_recording.cached_subresource_count': | |
| 429 len(cache_validation_result['expected_cached_resources']), | |
| 430 'benchmark_repeat.subresource_count': len(_ListUrlRequests( | |
| 431 trace, _RequestOutcome.All)), | |
| 432 'benchmark_repeat.served_from_cache_count_theoretic': | |
| 433 len(benchmark_setup['cache_whitelist']), | |
| 434 'benchmark_repeat.served_from_cache_count': len(_ListUrlRequests( | |
| 435 trace, _RequestOutcome.ServedFromCache)), | |
| 436 } | |
| 437 run_metrics.update( | |
| 438 sandwich_metrics.ExtractCommonMetricsFromRepeatDirectory( | |
| 439 repeat_dir, trace)) | |
| 440 run_metrics_list.append(run_metrics) | |
| 441 run_metrics_list.sort(key=lambda e: e['repeat_id']) | |
| 442 | |
| 443 wpr_log_path = os.path.join( | |
| 444 runner_output_dir, sandwich_runner.WPR_LOG_FILENAME) | |
| 445 logging.info('verifying wpr log: %s', wpr_log_path) | |
| 446 run_output_verifier.VerifyWprLog(wpr_log_path) | |
| 447 return run_metrics_list | |
| 448 | |
| 408 | 449 |
| 409 class PrefetchBenchmarkBuilder(task_manager.Builder): | 450 class PrefetchBenchmarkBuilder(task_manager.Builder): |
| 410 """A builder for a graph of tasks for NoState-Prefetch emulated benchmarks.""" | 451 """A builder for a graph of tasks for NoState-Prefetch emulated benchmarks.""" |
| 411 | 452 |
| 412 def __init__(self, common_builder): | 453 def __init__(self, common_builder): |
| 413 task_manager.Builder.__init__(self, | 454 task_manager.Builder.__init__(self, |
| 414 common_builder.output_directory, | 455 common_builder.output_directory, |
| 415 common_builder.output_subdirectory) | 456 common_builder.output_subdirectory) |
| 416 self._common_builder = common_builder | 457 self._common_builder = common_builder |
| 417 | 458 |
| 418 self._patched_wpr_task = None | 459 self._patched_wpr_task = None |
| 419 self._reference_cache_task = None | 460 self._cache_task = None |
| 420 self._trace_from_grabbing_reference_cache = None | 461 self._trace_from_grabbing_reference_cache = None |
| 421 self._subresources_for_urls_task = None | 462 self._cache_validation_task = None |
|
pasko
2016/06/03 17:13:46
I would prefer to just keep a path here than the w
gabadie
2016/06/06 09:43:14
Done.
| |
| 422 self._PopulateCommonPipelines() | 463 self._PopulateCommonPipelines() |
| 423 | 464 |
| 424 def _PopulateCommonPipelines(self): | 465 def _PopulateCommonPipelines(self): |
| 425 """Creates necessary tasks to produce initial cache archive. | 466 """Creates necessary tasks to produce initial cache archive. |
| 426 | 467 |
| 427 Also creates a task for producing a json file with a mapping of URLs to | 468 Also creates a task for producing a json file with a mapping of URLs to |
| 428 subresources (urls-resources.json). | 469 subresources (urls-resources.json). |
| 429 | 470 |
| 430 Here is the full dependency tree for the returned task: | 471 Here is the full dependency tree for the returned task: |
| 431 common/patched-cache-validation.log | 472 common/patched-cache-validation.json |
|
pasko
2016/06/03 17:13:46
probably should rename later to something like pat
gabadie
2016/06/06 09:43:14
Ok will make it in a separate CL.
| |
| 432 depends on: common/patched-cache.zip | 473 depends on: common/patched-cache.zip |
| 433 depends on: common/original-cache.zip | 474 depends on: common/original-cache.zip |
| 434 depends on: common/webpages-patched.wpr | 475 depends on: common/webpages-patched.wpr |
| 435 depends on: common/webpages.wpr | 476 depends on: common/webpages.wpr |
| 436 depends on: common/urls-resources.json | |
| 437 depends on: common/original-cache.zip | |
| 438 """ | 477 """ |
| 439 @self.RegisterTask('common/webpages-patched.wpr', | 478 @self.RegisterTask('common/webpages-patched.wpr', |
| 440 dependencies=[self._common_builder.original_wpr_task]) | 479 dependencies=[self._common_builder.original_wpr_task]) |
| 441 def BuildPatchedWpr(): | 480 def BuildPatchedWpr(): |
| 442 common_util.EnsureParentDirectoryExists(BuildPatchedWpr.path) | 481 common_util.EnsureParentDirectoryExists(BuildPatchedWpr.path) |
| 443 shutil.copyfile( | 482 shutil.copyfile( |
| 444 self._common_builder.original_wpr_task.path, BuildPatchedWpr.path) | 483 self._common_builder.original_wpr_task.path, BuildPatchedWpr.path) |
| 445 _PatchWpr(BuildPatchedWpr.path) | 484 _PatchWpr(BuildPatchedWpr.path) |
| 446 | 485 |
| 447 @self.RegisterTask('common/original-cache.zip', [BuildPatchedWpr]) | 486 @self.RegisterTask('common/original-cache.zip', [BuildPatchedWpr]) |
| 448 def BuildOriginalCache(): | 487 def BuildOriginalCache(): |
| 449 runner = self._common_builder.CreateSandwichRunner() | 488 runner = self._common_builder.CreateSandwichRunner() |
| 450 runner.wpr_archive_path = BuildPatchedWpr.path | 489 runner.wpr_archive_path = BuildPatchedWpr.path |
| 451 runner.cache_archive_path = BuildOriginalCache.path | 490 runner.cache_archive_path = BuildOriginalCache.path |
| 452 runner.cache_operation = sandwich_runner.CacheOperation.SAVE | 491 runner.cache_operation = sandwich_runner.CacheOperation.SAVE |
| 453 runner.output_dir = BuildOriginalCache.run_path | 492 runner.output_dir = BuildOriginalCache.run_path |
| 454 runner.Run() | 493 runner.Run() |
| 455 BuildOriginalCache.run_path = BuildOriginalCache.path[:-4] + '-run' | 494 BuildOriginalCache.run_path = BuildOriginalCache.path[:-4] + '-run' |
| 456 original_cache_trace_path = os.path.join( | 495 original_cache_trace_path = os.path.join( |
| 457 BuildOriginalCache.run_path, '0', sandwich_runner.TRACE_FILENAME) | 496 BuildOriginalCache.run_path, '0', sandwich_runner.TRACE_FILENAME) |
| 458 | 497 |
| 459 @self.RegisterTask('common/patched-cache.zip', [BuildOriginalCache]) | 498 @self.RegisterTask('common/patched-cache.zip', [BuildOriginalCache]) |
| 460 def BuildPatchedCache(): | 499 def BuildPatchedCache(): |
| 461 _PatchCacheArchive(BuildOriginalCache.path, | 500 _PatchCacheArchive(BuildOriginalCache.path, |
| 462 original_cache_trace_path, BuildPatchedCache.path) | 501 original_cache_trace_path, BuildPatchedCache.path) |
| 463 | 502 |
| 464 @self.RegisterTask('common/subresources-for-urls.json', | 503 @self.RegisterTask('common/patched-cache-validation.json', |
| 465 [BuildOriginalCache]) | |
| 466 def ListUrlsResources(): | |
| 467 url_resources = _ReadSubresourceFromRunnerOutputDir( | |
| 468 BuildOriginalCache.run_path) | |
| 469 with open(ListUrlsResources.path, 'w') as output: | |
| 470 json.dump(url_resources, output) | |
| 471 | |
| 472 @self.RegisterTask('common/patched-cache-validation.log', | |
| 473 [BuildPatchedCache]) | 504 [BuildPatchedCache]) |
| 474 def ValidatePatchedCache(): | 505 def ValidatePatchedCache(): |
| 475 handler = logging.FileHandler(ValidatePatchedCache.path) | 506 cache_validation_result = _ValidateCacheArchiveContent( |
| 476 logging.getLogger().addHandler(handler) | 507 original_cache_trace_path, BuildPatchedCache.path) |
| 477 try: | 508 with open(ValidatePatchedCache.path, 'w') as output: |
| 478 _ValidateCacheArchiveContent( | 509 json.dump(cache_validation_result, output) |
| 479 original_cache_trace_path, BuildPatchedCache.path) | |
| 480 finally: | |
| 481 logging.getLogger().removeHandler(handler) | |
| 482 | 510 |
| 483 self._patched_wpr_task = BuildPatchedWpr | 511 self._patched_wpr_task = BuildPatchedWpr |
| 484 self._trace_from_grabbing_reference_cache = original_cache_trace_path | 512 self._trace_from_grabbing_reference_cache = original_cache_trace_path |
| 485 self._reference_cache_task = BuildPatchedCache | 513 self._cache_task = BuildPatchedCache |
| 486 self._subresources_for_urls_task = ListUrlsResources | 514 self._cache_validation_task = ValidatePatchedCache |
| 487 | 515 |
| 488 self._common_builder.default_final_tasks.append(ValidatePatchedCache) | 516 self._common_builder.default_final_tasks.append(ValidatePatchedCache) |
| 489 | 517 |
| 490 def PopulateLoadBenchmark(self, subresource_discoverer, | 518 def PopulateLoadBenchmark(self, subresource_discoverer, |
| 491 transformer_list_name, transformer_list): | 519 transformer_list_name, transformer_list): |
| 492 """Populate benchmarking tasks from its setup tasks. | 520 """Populate benchmarking tasks from its setup tasks. |
| 493 | 521 |
| 494 Args: | 522 Args: |
| 495 subresource_discoverer: Name of a subresources discoverer. | 523 subresource_discoverer: Name of a subresources discoverer. |
| 496 transformer_list_name: A string describing the transformers, will be used | 524 transformer_list_name: A string describing the transformers, will be used |
| 497 in Task names (prefer names without spaces and special characters). | 525 in Task names (prefer names without spaces and special characters). |
| 498 transformer_list: An ordered list of function that takes an instance of | 526 transformer_list: An ordered list of function that takes an instance of |
| 499 SandwichRunner as parameter, would be applied immediately before | 527 SandwichRunner as parameter, would be applied immediately before |
| 500 SandwichRunner.Run() in the given order. | 528 SandwichRunner.Run() in the given order. |
| 501 | 529 |
| 502 Here is the full dependency of the added tree for the returned task: | 530 Here is the full dependency of the added tree for the returned task: |
| 503 <transformer_list_name>/<subresource_discoverer>-metrics.csv | 531 <transformer_list_name>/<subresource_discoverer>-metrics.csv |
| 504 depends on: <transformer_list_name>/<subresource_discoverer>-run/ | 532 depends on: <transformer_list_name>/<subresource_discoverer>-run/ |
| 505 depends on: common/<subresource_discoverer>-cache.zip | 533 depends on: common/<subresource_discoverer>-cache.zip |
| 506 depends on: some tasks saved by PopulateCommonPipelines() | 534 depends on: some tasks saved by PopulateCommonPipelines() |
|
pasko
2016/06/03 17:13:46
only depends on -setup.json, right?
gabadie
2016/06/06 09:43:14
Done.
| |
| 507 depends on: common/<subresource_discoverer>-setup.json | 535 depends on: common/<subresource_discoverer>-setup.json |
| 508 depends on: some tasks saved by PopulateCommonPipelines() | 536 depends on: some tasks saved by PopulateCommonPipelines() |
|
pasko
2016/06/03 17:13:46
some? should it just say common/patched-cache-vali
gabadie
2016/06/06 09:43:14
Done.
| |
| 509 """ | 537 """ |
| 510 additional_column_names = [ | 538 additional_column_names = [ |
| 511 'url', | 539 'url', |
| 512 'repeat_id', | 540 'repeat_id', |
| 513 'subresource_discoverer', | 541 'subresource_discoverer', |
| 514 'subresource_count', | 542 'cache_recording.subresource_count', |
| 515 # The amount of subresources detected at SetupBenchmark step. | 543 'cache_recording.cached_subresource_count_theoretic', |
| 516 'subresource_count_theoretic', | 544 'cache_recording.cached_subresource_count', |
| 517 # Amount of subresources for caching as suggested by the subresource | 545 'benchmark_repeat.subresource_count', |
|
pasko
2016/06/03 17:13:46
'benchmark_repeat' sounds more like a command than
gabadie
2016/06/06 09:43:15
Done.
| |
| 518 # discoverer. | 546 'benchmark_repeat.served_from_cache_count_theoretic', |
| 519 'cached_subresource_count_theoretic', | 547 'benchmark_repeat.served_from_cache_count'] |
| 520 'cached_subresource_count'] | |
| 521 | 548 |
| 522 assert subresource_discoverer in SUBRESOURCE_DISCOVERERS | 549 assert subresource_discoverer in SUBRESOURCE_DISCOVERERS |
| 523 assert 'common' not in SUBRESOURCE_DISCOVERERS | 550 assert 'common' not in SUBRESOURCE_DISCOVERERS |
| 524 shared_task_prefix = os.path.join('common', subresource_discoverer) | 551 shared_task_prefix = os.path.join('common', subresource_discoverer) |
| 525 task_prefix = os.path.join(transformer_list_name, subresource_discoverer) | 552 task_prefix = os.path.join(transformer_list_name, subresource_discoverer) |
| 526 | 553 |
| 527 @self.RegisterTask(shared_task_prefix + '-setup.json', merge=True, | 554 @self.RegisterTask(shared_task_prefix + '-setup.json', merge=True, |
| 528 dependencies=[self._subresources_for_urls_task]) | 555 dependencies=[self._cache_validation_task]) |
| 529 def SetupBenchmark(): | 556 def SetupBenchmark(): |
| 530 whitelisted_urls = _ExtractDiscoverableUrls( | 557 whitelisted_urls = _ExtractDiscoverableUrls( |
| 531 self._trace_from_grabbing_reference_cache, subresource_discoverer) | 558 self._trace_from_grabbing_reference_cache, subresource_discoverer) |
| 532 | 559 |
| 533 url_resources = json.load(open(self._subresources_for_urls_task.path)) | |
| 534 common_util.EnsureParentDirectoryExists(SetupBenchmark.path) | 560 common_util.EnsureParentDirectoryExists(SetupBenchmark.path) |
| 535 with open(SetupBenchmark.path, 'w') as output: | 561 with open(SetupBenchmark.path, 'w') as output: |
| 536 json.dump({ | 562 json.dump({ |
| 537 'cache_whitelist': [url for url in whitelisted_urls], | 563 'cache_whitelist': [url for url in whitelisted_urls], |
| 538 'subresource_discoverer': subresource_discoverer, | 564 'subresource_discoverer': subresource_discoverer, |
| 539 'url_resources': url_resources, | |
| 540 }, output) | 565 }, output) |
| 541 | 566 |
| 542 @self.RegisterTask(shared_task_prefix + '-cache.zip', merge=True, | 567 @self.RegisterTask(shared_task_prefix + '-cache.zip', merge=True, |
| 543 dependencies=[ | 568 dependencies=[SetupBenchmark]) |
| 544 SetupBenchmark, self._reference_cache_task]) | |
| 545 def BuildBenchmarkCacheArchive(): | 569 def BuildBenchmarkCacheArchive(): |
| 546 setup = json.load(open(SetupBenchmark.path)) | 570 benchmark_setup = json.load(open(SetupBenchmark.path)) |
| 547 chrome_cache.ApplyUrlWhitelistToCacheArchive( | 571 chrome_cache.ApplyUrlWhitelistToCacheArchive( |
| 548 cache_archive_path=self._reference_cache_task.path, | 572 cache_archive_path=self._cache_task.path, |
| 549 whitelisted_urls=setup['cache_whitelist'], | 573 whitelisted_urls=benchmark_setup['cache_whitelist'], |
| 550 output_cache_archive_path=BuildBenchmarkCacheArchive.path) | 574 output_cache_archive_path=BuildBenchmarkCacheArchive.path) |
| 551 | 575 |
| 552 @self.RegisterTask(task_prefix + '-run/', | 576 @self.RegisterTask(task_prefix + '-run/', |
| 553 dependencies=[BuildBenchmarkCacheArchive]) | 577 dependencies=[BuildBenchmarkCacheArchive]) |
| 554 def RunBenchmark(): | 578 def RunBenchmark(): |
| 555 runner = self._common_builder.CreateSandwichRunner() | 579 runner = self._common_builder.CreateSandwichRunner() |
| 556 for transformer in transformer_list: | 580 for transformer in transformer_list: |
| 557 transformer(runner) | 581 transformer(runner) |
| 558 runner.wpr_archive_path = self._patched_wpr_task.path | 582 runner.wpr_archive_path = self._patched_wpr_task.path |
| 559 runner.wpr_out_log_path = os.path.join( | 583 runner.wpr_out_log_path = os.path.join( |
| 560 RunBenchmark.path, sandwich_runner.WPR_LOG_FILENAME) | 584 RunBenchmark.path, sandwich_runner.WPR_LOG_FILENAME) |
| 561 runner.cache_archive_path = BuildBenchmarkCacheArchive.path | 585 runner.cache_archive_path = BuildBenchmarkCacheArchive.path |
| 562 runner.cache_operation = sandwich_runner.CacheOperation.PUSH | 586 runner.cache_operation = sandwich_runner.CacheOperation.PUSH |
| 563 runner.output_dir = RunBenchmark.path | 587 runner.output_dir = RunBenchmark.path |
| 564 runner.Run() | 588 runner.Run() |
| 565 | 589 |
| 566 @self.RegisterTask(task_prefix + '-metrics.csv', | 590 @self.RegisterTask(task_prefix + '-metrics.csv', |
| 567 dependencies=[RunBenchmark]) | 591 dependencies=[RunBenchmark]) |
| 568 def ExtractMetrics(): | 592 def ProcessRunOutputDir(): |
| 569 # TODO(gabadie): Performance improvement: load each trace only once and | 593 benchmark_setup = json.load(open(SetupBenchmark.path)) |
| 570 # use it for validation and extraction of metrics later. | 594 cache_validation_result = json.load( |
| 571 _VerifyBenchmarkOutputDirectory(SetupBenchmark.path, RunBenchmark.path) | 595 open(self._cache_validation_task.path)) |
| 572 | 596 |
| 573 benchmark_setup = json.load(open(SetupBenchmark.path)) | 597 run_metrics_list = _ProcessRunOutputDir( |
| 574 run_metrics_list = [] | 598 cache_validation_result, benchmark_setup, RunBenchmark.path) |
| 575 for repeat_id, repeat_dir in sandwich_runner.WalkRepeatedRuns( | 599 with open(ProcessRunOutputDir.path, 'w') as csv_file: |
| 576 RunBenchmark.path): | |
| 577 trace_path = os.path.join(repeat_dir, sandwich_runner.TRACE_FILENAME) | |
| 578 logging.info('processing trace: %s', trace_path) | |
| 579 trace = loading_trace.LoadingTrace.FromJsonFile(trace_path) | |
| 580 run_metrics = { | |
| 581 'url': trace.url, | |
| 582 'repeat_id': repeat_id, | |
| 583 'subresource_discoverer': benchmark_setup['subresource_discoverer'], | |
| 584 'subresource_count': len(_ListUrlRequests( | |
| 585 trace, _RequestOutcome.All)), | |
| 586 'subresource_count_theoretic': | |
| 587 len(benchmark_setup['url_resources']), | |
| 588 'cached_subresource_count': len(_ListUrlRequests( | |
| 589 trace, _RequestOutcome.ServedFromCache)), | |
| 590 'cached_subresource_count_theoretic': | |
| 591 len(benchmark_setup['cache_whitelist']), | |
| 592 } | |
| 593 run_metrics.update( | |
| 594 sandwich_metrics.ExtractCommonMetricsFromRepeatDirectory( | |
| 595 repeat_dir, trace)) | |
| 596 run_metrics_list.append(run_metrics) | |
| 597 | |
| 598 run_metrics_list.sort(key=lambda e: e['repeat_id']) | |
| 599 with open(ExtractMetrics.path, 'w') as csv_file: | |
| 600 writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names + | 600 writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names + |
| 601 sandwich_metrics.COMMON_CSV_COLUMN_NAMES)) | 601 sandwich_metrics.COMMON_CSV_COLUMN_NAMES)) |
| 602 writer.writeheader() | 602 writer.writeheader() |
| 603 for trace_metrics in run_metrics_list: | 603 for trace_metrics in run_metrics_list: |
| 604 writer.writerow(trace_metrics) | 604 writer.writerow(trace_metrics) |
| 605 | 605 |
| 606 self._common_builder.default_final_tasks.append(ExtractMetrics) | 606 self._common_builder.default_final_tasks.append(ProcessRunOutputDir) |
| OLD | NEW |