OLD | NEW |
1 # Copyright 2016 The Chromium Authors. All rights reserved. | 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
| 5 """ |
| 6 Implements a task builder for benchmarking effects of NoState Prefetch. |
| 7 Noticeable steps of the task pipeline: |
| 8 * Save a WPR archive |
| 9 * Process the WPR archive to make all resources cacheable |
| 10 * Process cache archive to patch response headers back to their original |
| 11 values. |
| 12 * Find out which resources are discoverable by NoState Prefetch |
| 13 (HTMLPreloadScanner) |
| 14 * Load pages with empty/full/prefetched cache |
| 15 * Extract most important metrics to a CSV |
| 16 """ |
| 17 |
5 import csv | 18 import csv |
6 import logging | 19 import logging |
7 import json | 20 import json |
8 import os | 21 import os |
9 import re | 22 import re |
10 import shutil | 23 import shutil |
11 from urlparse import urlparse | 24 from urlparse import urlparse |
12 | 25 |
13 import chrome_cache | 26 import chrome_cache |
14 import common_util | 27 import common_util |
(...skipping 176 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
191 elif subresource_discoverer == PARSER_DISCOVERER: | 204 elif subresource_discoverer == PARSER_DISCOVERER: |
192 discovered_requests = PrefetchSimulationView.ParserDiscoverableRequests( | 205 discovered_requests = PrefetchSimulationView.ParserDiscoverableRequests( |
193 first_resource_request, dependencies_lens) | 206 first_resource_request, dependencies_lens) |
194 elif subresource_discoverer == HTML_PRELOAD_SCANNER_DISCOVERER: | 207 elif subresource_discoverer == HTML_PRELOAD_SCANNER_DISCOVERER: |
195 discovered_requests = PrefetchSimulationView.PreloadedRequests( | 208 discovered_requests = PrefetchSimulationView.PreloadedRequests( |
196 first_resource_request, dependencies_lens, trace) | 209 first_resource_request, dependencies_lens, trace) |
197 else: | 210 else: |
198 assert False | 211 assert False |
199 | 212 |
200 whitelisted_urls = set() | 213 whitelisted_urls = set() |
201 logging.info('white-listing %s' % first_resource_request.url) | |
202 for request in _FilterOutDataAndIncompleteRequests(discovered_requests): | 214 for request in _FilterOutDataAndIncompleteRequests(discovered_requests): |
203 logging.info('white-listing %s' % request.url) | 215 logging.debug('white-listing %s', request.url) |
204 whitelisted_urls.add(request.url) | 216 whitelisted_urls.add(request.url) |
| 217 logging.info('number of white-listed resources: %d', len(whitelisted_urls)) |
205 return whitelisted_urls | 218 return whitelisted_urls |
206 | 219 |
207 | 220 |
208 def _PrintUrlSetComparison(ref_url_set, url_set, url_set_name): | 221 def _PrintUrlSetComparison(ref_url_set, url_set, url_set_name): |
209 """Compare URL sets and log the diffs. | 222 """Compare URL sets and log the diffs. |
210 | 223 |
211 Args: | 224 Args: |
212 ref_url_set: Set of reference urls. | 225 ref_url_set: Set of reference urls. |
213 url_set: Set of urls to compare to the reference. | 226 url_set: Set of urls to compare to the reference. |
214 url_set_name: The set name for logging purposes. | 227 url_set_name: The set name for logging purposes. |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
254 request_event.method.upper().strip() == 'POST'): | 267 request_event.method.upper().strip() == 'POST'): |
255 urls.add(request_event.url) | 268 urls.add(request_event.url) |
256 elif (request_kind == _RequestOutcome.NotServedFromCache and | 269 elif (request_kind == _RequestOutcome.NotServedFromCache and |
257 not request_event.from_disk_cache): | 270 not request_event.from_disk_cache): |
258 urls.add(request_event.url) | 271 urls.add(request_event.url) |
259 elif request_kind == _RequestOutcome.All: | 272 elif request_kind == _RequestOutcome.All: |
260 urls.add(request_event.url) | 273 urls.add(request_event.url) |
261 return urls | 274 return urls |
262 | 275 |
263 | 276 |
264 def _VerifyBenchmarkOutputDirectory(benchmark_setup_path, | 277 class _RunOutputVerifier(object): |
265 benchmark_output_directory_path): | 278 """Object to verify benchmark run from traces and WPR log stored in the |
266 """Verifies that all run inside the run_output_directory worked as expected. | 279 runner output directory. |
| 280 """ |
267 | 281 |
268 Args: | 282 def __init__(self, cache_validation_result, benchmark_setup): |
269 benchmark_setup_path: Path of the JSON of the benchmark setup. | 283 """Constructor. |
270 benchmark_output_directory_path: Path of the benchmark output directory to | |
271 verify. | |
272 """ | |
273 # TODO(gabadie): What's the best way of propagating errors happening in here? | |
274 benchmark_setup = json.load(open(benchmark_setup_path)) | |
275 cache_whitelist = set(benchmark_setup['cache_whitelist']) | |
276 original_requests = set(benchmark_setup['url_resources']) | |
277 original_cached_requests = original_requests.intersection(cache_whitelist) | |
278 original_uncached_requests = original_requests.difference(cache_whitelist) | |
279 all_sent_url_requests = set() | |
280 | 284 |
281 # Verify requests from traces. | 285 Args: |
282 run_id = -1 | 286 cache_validation_result: JSON of the cache validation task. |
283 while True: | 287 benchmark_setup: JSON of the benchmark setup. |
284 run_id += 1 | 288 """ |
285 run_path = os.path.join(benchmark_output_directory_path, str(run_id)) | 289 self._cache_whitelist = set(benchmark_setup['cache_whitelist']) |
286 if not os.path.isdir(run_path): | 290 self._original_requests = set(cache_validation_result['effective_requests']) |
287 break | 291 self._original_post_requests = set( |
288 trace_path = os.path.join(run_path, sandwich_runner.TRACE_FILENAME) | 292 cache_validation_result['effective_post_requests']) |
289 if not os.path.isfile(trace_path): | 293 self._original_cached_requests = self._original_requests.intersection( |
290 logging.error('missing trace %s' % trace_path) | 294 self._cache_whitelist) |
291 continue | 295 self._original_uncached_requests = self._original_requests.difference( |
292 trace = loading_trace.LoadingTrace.FromJsonFile(trace_path) | 296 self._cache_whitelist) |
293 logging.info('verifying %s from %s' % (trace.url, trace_path)) | 297 self._all_sent_url_requests = set() |
294 | 298 |
| 299 def VerifyTrace(self, trace): |
| 300 """Verifies a trace with the cache validation result and the benchmark |
| 301 setup. |
| 302 """ |
295 effective_requests = _ListUrlRequests(trace, _RequestOutcome.All) | 303 effective_requests = _ListUrlRequests(trace, _RequestOutcome.All) |
296 effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post) | 304 effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post) |
297 effective_cached_requests = \ | 305 effective_cached_requests = \ |
298 _ListUrlRequests(trace, _RequestOutcome.ServedFromCache) | 306 _ListUrlRequests(trace, _RequestOutcome.ServedFromCache) |
299 effective_uncached_requests = \ | 307 effective_uncached_requests = \ |
300 _ListUrlRequests(trace, _RequestOutcome.NotServedFromCache) | 308 _ListUrlRequests(trace, _RequestOutcome.NotServedFromCache) |
301 | 309 |
302 missing_requests = original_requests.difference(effective_requests) | 310 missing_requests = self._original_requests.difference(effective_requests) |
303 unexpected_requests = effective_requests.difference(original_requests) | 311 unexpected_requests = effective_requests.difference(self._original_requests) |
304 expected_cached_requests = \ | 312 expected_cached_requests = \ |
305 original_cached_requests.difference(missing_requests) | 313 self._original_cached_requests.difference(missing_requests) |
306 missing_cached_requests = \ | 314 expected_uncached_requests = self._original_uncached_requests.union( |
307 expected_cached_requests.difference(effective_cached_requests) | 315 unexpected_requests).difference(missing_requests) |
308 expected_uncached_requests = original_uncached_requests.union( | |
309 unexpected_requests).union(missing_cached_requests) | |
310 all_sent_url_requests.update(effective_uncached_requests) | |
311 | 316 |
312 # POST requests are known to be unable to use the cache. | 317 # POST requests are known to be unable to use the cache. |
313 expected_cached_requests.difference_update(effective_post_requests) | 318 expected_cached_requests.difference_update(effective_post_requests) |
314 expected_uncached_requests.update(effective_post_requests) | 319 expected_uncached_requests.update(effective_post_requests) |
315 | 320 |
316 _PrintUrlSetComparison(original_requests, effective_requests, | 321 _PrintUrlSetComparison(self._original_requests, effective_requests, |
317 'All resources') | 322 'All resources') |
318 _PrintUrlSetComparison(set(), effective_post_requests, | 323 _PrintUrlSetComparison(set(), effective_post_requests, 'POST resources') |
319 'POST resources') | |
320 _PrintUrlSetComparison(expected_cached_requests, effective_cached_requests, | 324 _PrintUrlSetComparison(expected_cached_requests, effective_cached_requests, |
321 'Cached resources') | 325 'Cached resources') |
322 _PrintUrlSetComparison(expected_uncached_requests, | 326 _PrintUrlSetComparison(expected_uncached_requests, |
323 effective_uncached_requests, 'Non cached resources') | 327 effective_uncached_requests, 'Non cached resources') |
324 | 328 |
325 # Verify requests from WPR. | 329 self._all_sent_url_requests.update(effective_uncached_requests) |
326 wpr_log_path = os.path.join( | |
327 benchmark_output_directory_path, sandwich_runner.WPR_LOG_FILENAME) | |
328 logging.info('verifying requests from %s' % wpr_log_path) | |
329 all_wpr_requests = wpr_backend.ExtractRequestsFromLog(wpr_log_path) | |
330 all_wpr_urls = set() | |
331 unserved_wpr_urls = set() | |
332 wpr_command_colliding_urls = set() | |
333 | 330 |
334 for request in all_wpr_requests: | 331 def VerifyWprLog(self, wpr_log_path): |
335 if request.is_wpr_host: | 332 """Verifies WPR log with previously verified traces.""" |
336 continue | 333 all_wpr_requests = wpr_backend.ExtractRequestsFromLog(wpr_log_path) |
337 if urlparse(request.url).path.startswith('/web-page-replay'): | 334 all_wpr_urls = set() |
338 wpr_command_colliding_urls.add(request.url) | 335 unserved_wpr_urls = set() |
339 elif request.is_served is False: | 336 wpr_command_colliding_urls = set() |
340 unserved_wpr_urls.add(request.url) | |
341 all_wpr_urls.add(request.url) | |
342 | 337 |
343 _PrintUrlSetComparison(set(), unserved_wpr_urls, | 338 for request in all_wpr_requests: |
344 'Distinct unserved resources from WPR') | 339 if request.is_wpr_host: |
345 _PrintUrlSetComparison(set(), wpr_command_colliding_urls, | 340 continue |
346 'Distinct resources colliding to WPR commands') | 341 if urlparse(request.url).path.startswith('/web-page-replay'): |
347 _PrintUrlSetComparison(all_wpr_urls, all_sent_url_requests, | 342 wpr_command_colliding_urls.add(request.url) |
348 'Distinct resource requests to WPR') | 343 elif request.is_served is False: |
| 344 unserved_wpr_urls.add(request.url) |
| 345 all_wpr_urls.add(request.url) |
349 | 346 |
350 | 347 _PrintUrlSetComparison(set(), unserved_wpr_urls, |
351 def _ReadSubresourceFromRunnerOutputDir(runner_output_dir): | 348 'Distinct unserved resources from WPR') |
352 """Extracts a list of subresources in runner output directory. | 349 _PrintUrlSetComparison(set(), wpr_command_colliding_urls, |
353 | 350 'Distinct resources colliding to WPR commands') |
354 Args: | 351 _PrintUrlSetComparison(all_wpr_urls, self._all_sent_url_requests, |
355 runner_output_dir: Path of the runner's output directory. | 352 'Distinct resource requests to WPR') |
356 | |
357 Returns: | |
358 [URLs of sub-resources] | |
359 """ | |
360 trace_path = os.path.join( | |
361 runner_output_dir, '0', sandwich_runner.TRACE_FILENAME) | |
362 trace = loading_trace.LoadingTrace.FromJsonFile(trace_path) | |
363 url_set = set() | |
364 for request_event in _FilterOutDataAndIncompleteRequests( | |
365 trace.request_track.GetEvents()): | |
366 url_set.add(request_event.url) | |
367 logging.info('lists %s resources of %s from %s' % \ | |
368 (len(url_set), trace.url, trace_path)) | |
369 return [url for url in url_set] | |
370 | 353 |
371 | 354 |
372 def _ValidateCacheArchiveContent(cache_build_trace_path, cache_archive_path): | 355 def _ValidateCacheArchiveContent(cache_build_trace_path, cache_archive_path): |
373 """Validates a cache archive content. | 356 """Validates a cache archive content. |
374 | 357 |
375 Args: | 358 Args: |
376 cache_build_trace_path: Path of the generated trace at the cache build time. | 359 cache_build_trace_path: Path of the generated trace at the cache build time. |
377 cache_archive_path: Cache archive's path to validate. | 360 cache_archive_path: Cache archive's path to validate. |
| 361 |
| 362 Returns: |
| 363 { |
| 364 'effective_requests': [URLs of all requests], |
| 365 'effective_post_requests': [URLs of POST requests], |
| 366 'expected_cached_resources': [URLs of resources expected to be cached], |
| 367 'successfully_cached': [URLs of cached sub-resources] |
| 368 } |
378 """ | 369 """ |
379 # TODO(gabadie): What's the best way of propagating errors happening in here? | 370 # TODO(gabadie): What's the best way of propagating errors happening in here? |
380 logging.info('lists cached urls from %s' % cache_archive_path) | 371 logging.info('lists cached urls from %s' % cache_archive_path) |
381 with common_util.TemporaryDirectory() as cache_directory: | 372 with common_util.TemporaryDirectory() as cache_directory: |
382 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory) | 373 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory) |
383 cache_keys = set( | 374 cache_keys = set( |
384 chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys()) | 375 chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys()) |
385 trace = loading_trace.LoadingTrace.FromJsonFile(cache_build_trace_path) | 376 trace = loading_trace.LoadingTrace.FromJsonFile(cache_build_trace_path) |
386 effective_requests = _ListUrlRequests(trace, _RequestOutcome.All) | 377 effective_requests = _ListUrlRequests(trace, _RequestOutcome.All) |
387 effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post) | 378 effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post) |
(...skipping 10 matching lines...) Expand all Loading... |
398 expected_cached_requests = effective_requests.difference( | 389 expected_cached_requests = effective_requests.difference( |
399 effective_post_requests) | 390 effective_post_requests) |
400 effective_cache_keys = cache_keys.difference( | 391 effective_cache_keys = cache_keys.difference( |
401 upload_data_stream_cache_entry_keys) | 392 upload_data_stream_cache_entry_keys) |
402 | 393 |
403 _PrintUrlSetComparison(effective_post_requests, upload_data_stream_requests, | 394 _PrintUrlSetComparison(effective_post_requests, upload_data_stream_requests, |
404 'POST resources') | 395 'POST resources') |
405 _PrintUrlSetComparison(expected_cached_requests, effective_cache_keys, | 396 _PrintUrlSetComparison(expected_cached_requests, effective_cache_keys, |
406 'Cached resources') | 397 'Cached resources') |
407 | 398 |
| 399 return { |
| 400 'effective_requests': [url for url in effective_requests], |
| 401 'effective_post_requests': [url for url in effective_post_requests], |
| 402 'expected_cached_resources': [url for url in expected_cached_requests], |
| 403 'successfully_cached_resources': [url for url in effective_cache_keys] |
| 404 } |
| 405 |
| 406 |
| 407 def _ProcessRunOutputDir( |
| 408 cache_validation_result, benchmark_setup, runner_output_dir): |
| 409 """Process benchmark's run output directory. |
| 410 |
| 411 Args: |
| 412 cache_validation_result: Same as for _RunOutputVerifier |
| 413 benchmark_setup: Same as for _RunOutputVerifier |
| 414 runner_output_dir: Same as for SandwichRunner.output_dir |
| 415 |
| 416 Returns: |
| 417 List of dictionary. |
| 418 """ |
| 419 run_metrics_list = [] |
| 420 run_output_verifier = _RunOutputVerifier( |
| 421 cache_validation_result, benchmark_setup) |
| 422 for repeat_id, repeat_dir in sandwich_runner.WalkRepeatedRuns( |
| 423 runner_output_dir): |
| 424 trace_path = os.path.join(repeat_dir, sandwich_runner.TRACE_FILENAME) |
| 425 |
| 426 logging.info('loading trace: %s', trace_path) |
| 427 trace = loading_trace.LoadingTrace.FromJsonFile(trace_path) |
| 428 |
| 429 logging.info('verifying trace: %s', trace_path) |
| 430 run_output_verifier.VerifyTrace(trace) |
| 431 |
| 432 logging.info('extracting metrics from trace: %s', trace_path) |
| 433 run_metrics = { |
| 434 'url': trace.url, |
| 435 'repeat_id': repeat_id, |
| 436 'subresource_discoverer': benchmark_setup['subresource_discoverer'], |
| 437 'cache_recording.subresource_count': |
| 438 len(cache_validation_result['effective_requests']), |
| 439 'cache_recording.cached_subresource_count_theoretic': |
| 440 len(cache_validation_result['successfully_cached_resources']), |
| 441 'cache_recording.cached_subresource_count': |
| 442 len(cache_validation_result['expected_cached_resources']), |
| 443 'benchmark.subresource_count': len(_ListUrlRequests( |
| 444 trace, _RequestOutcome.All)), |
| 445 'benchmark.served_from_cache_count_theoretic': |
| 446 len(benchmark_setup['cache_whitelist']), |
| 447 'benchmark.served_from_cache_count': len(_ListUrlRequests( |
| 448 trace, _RequestOutcome.ServedFromCache)), |
| 449 } |
| 450 run_metrics.update( |
| 451 sandwich_metrics.ExtractCommonMetricsFromRepeatDirectory( |
| 452 repeat_dir, trace)) |
| 453 run_metrics_list.append(run_metrics) |
| 454 run_metrics_list.sort(key=lambda e: e['repeat_id']) |
| 455 |
| 456 wpr_log_path = os.path.join( |
| 457 runner_output_dir, sandwich_runner.WPR_LOG_FILENAME) |
| 458 logging.info('verifying wpr log: %s', wpr_log_path) |
| 459 run_output_verifier.VerifyWprLog(wpr_log_path) |
| 460 return run_metrics_list |
| 461 |
408 | 462 |
409 class PrefetchBenchmarkBuilder(task_manager.Builder): | 463 class PrefetchBenchmarkBuilder(task_manager.Builder): |
410 """A builder for a graph of tasks for NoState-Prefetch emulated benchmarks.""" | 464 """A builder for a graph of tasks for NoState-Prefetch emulated benchmarks.""" |
411 | 465 |
412 def __init__(self, common_builder): | 466 def __init__(self, common_builder): |
413 task_manager.Builder.__init__(self, | 467 task_manager.Builder.__init__(self, |
414 common_builder.output_directory, | 468 common_builder.output_directory, |
415 common_builder.output_subdirectory) | 469 common_builder.output_subdirectory) |
416 self._common_builder = common_builder | 470 self._common_builder = common_builder |
417 | 471 |
418 self._patched_wpr_task = None | 472 self._wpr_archive_path = None |
419 self._reference_cache_task = None | 473 self._cache_path = None |
420 self._trace_from_grabbing_reference_cache = None | 474 self._trace_from_grabbing_reference_cache = None |
421 self._subresources_for_urls_task = None | 475 self._cache_validation_task = None |
422 self._PopulateCommonPipelines() | 476 self._PopulateCommonPipelines() |
423 | 477 |
424 def _PopulateCommonPipelines(self): | 478 def _PopulateCommonPipelines(self): |
425 """Creates necessary tasks to produce initial cache archive. | 479 """Creates necessary tasks to produce initial cache archive. |
426 | 480 |
427 Also creates a task for producing a json file with a mapping of URLs to | 481 Also creates a task for producing a json file with a mapping of URLs to |
428 subresources (urls-resources.json). | 482 subresources (urls-resources.json). |
429 | 483 |
430 Here is the full dependency tree for the returned task: | 484 Here is the full dependency tree for the returned task: |
431 common/patched-cache-validation.log | 485 common/patched-cache-validation.json |
432 depends on: common/patched-cache.zip | 486 depends on: common/patched-cache.zip |
433 depends on: common/original-cache.zip | 487 depends on: common/original-cache.zip |
434 depends on: common/webpages-patched.wpr | 488 depends on: common/webpages-patched.wpr |
435 depends on: common/webpages.wpr | 489 depends on: common/webpages.wpr |
436 depends on: common/urls-resources.json | |
437 depends on: common/original-cache.zip | |
438 """ | 490 """ |
439 @self.RegisterTask('common/webpages-patched.wpr', | 491 @self.RegisterTask('common/webpages-patched.wpr', |
440 dependencies=[self._common_builder.original_wpr_task]) | 492 dependencies=[self._common_builder.original_wpr_task]) |
441 def BuildPatchedWpr(): | 493 def BuildPatchedWpr(): |
442 common_util.EnsureParentDirectoryExists(BuildPatchedWpr.path) | 494 common_util.EnsureParentDirectoryExists(BuildPatchedWpr.path) |
443 shutil.copyfile( | 495 shutil.copyfile( |
444 self._common_builder.original_wpr_task.path, BuildPatchedWpr.path) | 496 self._common_builder.original_wpr_task.path, BuildPatchedWpr.path) |
445 _PatchWpr(BuildPatchedWpr.path) | 497 _PatchWpr(BuildPatchedWpr.path) |
446 | 498 |
447 @self.RegisterTask('common/original-cache.zip', [BuildPatchedWpr]) | 499 @self.RegisterTask('common/original-cache.zip', [BuildPatchedWpr]) |
448 def BuildOriginalCache(): | 500 def BuildOriginalCache(): |
449 runner = self._common_builder.CreateSandwichRunner() | 501 runner = self._common_builder.CreateSandwichRunner() |
450 runner.wpr_archive_path = BuildPatchedWpr.path | 502 runner.wpr_archive_path = BuildPatchedWpr.path |
451 runner.cache_archive_path = BuildOriginalCache.path | 503 runner.cache_archive_path = BuildOriginalCache.path |
452 runner.cache_operation = sandwich_runner.CacheOperation.SAVE | 504 runner.cache_operation = sandwich_runner.CacheOperation.SAVE |
453 runner.output_dir = BuildOriginalCache.run_path | 505 runner.output_dir = BuildOriginalCache.run_path |
454 runner.Run() | 506 runner.Run() |
455 BuildOriginalCache.run_path = BuildOriginalCache.path[:-4] + '-run' | 507 BuildOriginalCache.run_path = BuildOriginalCache.path[:-4] + '-run' |
456 original_cache_trace_path = os.path.join( | 508 original_cache_trace_path = os.path.join( |
457 BuildOriginalCache.run_path, '0', sandwich_runner.TRACE_FILENAME) | 509 BuildOriginalCache.run_path, '0', sandwich_runner.TRACE_FILENAME) |
458 | 510 |
459 @self.RegisterTask('common/patched-cache.zip', [BuildOriginalCache]) | 511 @self.RegisterTask('common/patched-cache.zip', [BuildOriginalCache]) |
460 def BuildPatchedCache(): | 512 def BuildPatchedCache(): |
461 _PatchCacheArchive(BuildOriginalCache.path, | 513 _PatchCacheArchive(BuildOriginalCache.path, |
462 original_cache_trace_path, BuildPatchedCache.path) | 514 original_cache_trace_path, BuildPatchedCache.path) |
463 | 515 |
464 @self.RegisterTask('common/subresources-for-urls.json', | 516 @self.RegisterTask('common/patched-cache-validation.json', |
465 [BuildOriginalCache]) | |
466 def ListUrlsResources(): | |
467 url_resources = _ReadSubresourceFromRunnerOutputDir( | |
468 BuildOriginalCache.run_path) | |
469 with open(ListUrlsResources.path, 'w') as output: | |
470 json.dump(url_resources, output) | |
471 | |
472 @self.RegisterTask('common/patched-cache-validation.log', | |
473 [BuildPatchedCache]) | 517 [BuildPatchedCache]) |
474 def ValidatePatchedCache(): | 518 def ValidatePatchedCache(): |
475 handler = logging.FileHandler(ValidatePatchedCache.path) | 519 cache_validation_result = _ValidateCacheArchiveContent( |
476 logging.getLogger().addHandler(handler) | 520 original_cache_trace_path, BuildPatchedCache.path) |
477 try: | 521 with open(ValidatePatchedCache.path, 'w') as output: |
478 _ValidateCacheArchiveContent( | 522 json.dump(cache_validation_result, output) |
479 original_cache_trace_path, BuildPatchedCache.path) | |
480 finally: | |
481 logging.getLogger().removeHandler(handler) | |
482 | 523 |
483 self._patched_wpr_task = BuildPatchedWpr | 524 self._wpr_archive_path = BuildPatchedWpr.path |
484 self._trace_from_grabbing_reference_cache = original_cache_trace_path | 525 self._trace_from_grabbing_reference_cache = original_cache_trace_path |
485 self._reference_cache_task = BuildPatchedCache | 526 self._cache_path = BuildPatchedCache.path |
486 self._subresources_for_urls_task = ListUrlsResources | 527 self._cache_validation_task = ValidatePatchedCache |
487 | 528 |
488 self._common_builder.default_final_tasks.append(ValidatePatchedCache) | 529 self._common_builder.default_final_tasks.append(ValidatePatchedCache) |
489 | 530 |
490 def PopulateLoadBenchmark(self, subresource_discoverer, | 531 def PopulateLoadBenchmark(self, subresource_discoverer, |
491 transformer_list_name, transformer_list): | 532 transformer_list_name, transformer_list): |
492 """Populate benchmarking tasks from its setup tasks. | 533 """Populate benchmarking tasks from its setup tasks. |
493 | 534 |
494 Args: | 535 Args: |
495 subresource_discoverer: Name of a subresources discoverer. | 536 subresource_discoverer: Name of a subresources discoverer. |
496 transformer_list_name: A string describing the transformers, will be used | 537 transformer_list_name: A string describing the transformers, will be used |
497 in Task names (prefer names without spaces and special characters). | 538 in Task names (prefer names without spaces and special characters). |
498 transformer_list: An ordered list of function that takes an instance of | 539 transformer_list: An ordered list of function that takes an instance of |
499 SandwichRunner as parameter, would be applied immediately before | 540 SandwichRunner as parameter, would be applied immediately before |
500 SandwichRunner.Run() in the given order. | 541 SandwichRunner.Run() in the given order. |
501 | 542 |
502 Here is the full dependency of the added tree for the returned task: | 543 Here is the full dependency of the added tree for the returned task: |
503 <transformer_list_name>/<subresource_discoverer>-metrics.csv | 544 <transformer_list_name>/<subresource_discoverer>-metrics.csv |
504 depends on: <transformer_list_name>/<subresource_discoverer>-run/ | 545 depends on: <transformer_list_name>/<subresource_discoverer>-run/ |
505 depends on: common/<subresource_discoverer>-cache.zip | 546 depends on: common/<subresource_discoverer>-cache.zip |
506 depends on: some tasks saved by PopulateCommonPipelines() | |
507 depends on: common/<subresource_discoverer>-setup.json | 547 depends on: common/<subresource_discoverer>-setup.json |
508 depends on: some tasks saved by PopulateCommonPipelines() | 548 depends on: common/patched-cache-validation.json |
509 """ | 549 """ |
510 additional_column_names = [ | 550 additional_column_names = [ |
511 'url', | 551 'url', |
512 'repeat_id', | 552 'repeat_id', |
513 'subresource_discoverer', | 553 'subresource_discoverer', |
514 'subresource_count', | 554 'cache_recording.subresource_count', |
515 # The amount of subresources detected at SetupBenchmark step. | 555 'cache_recording.cached_subresource_count_theoretic', |
516 'subresource_count_theoretic', | 556 'cache_recording.cached_subresource_count', |
517 # Amount of subresources for caching as suggested by the subresource | 557 'benchmark.subresource_count', |
518 # discoverer. | 558 'benchmark.served_from_cache_count_theoretic', |
519 'cached_subresource_count_theoretic', | 559 'benchmark.served_from_cache_count'] |
520 'cached_subresource_count'] | |
521 | 560 |
522 assert subresource_discoverer in SUBRESOURCE_DISCOVERERS | 561 assert subresource_discoverer in SUBRESOURCE_DISCOVERERS |
523 assert 'common' not in SUBRESOURCE_DISCOVERERS | 562 assert 'common' not in SUBRESOURCE_DISCOVERERS |
524 shared_task_prefix = os.path.join('common', subresource_discoverer) | 563 shared_task_prefix = os.path.join('common', subresource_discoverer) |
525 task_prefix = os.path.join(transformer_list_name, subresource_discoverer) | 564 task_prefix = os.path.join(transformer_list_name, subresource_discoverer) |
526 | 565 |
527 @self.RegisterTask(shared_task_prefix + '-setup.json', merge=True, | 566 @self.RegisterTask(shared_task_prefix + '-setup.json', merge=True, |
528 dependencies=[self._subresources_for_urls_task]) | 567 dependencies=[self._cache_validation_task]) |
529 def SetupBenchmark(): | 568 def SetupBenchmark(): |
530 whitelisted_urls = _ExtractDiscoverableUrls( | 569 whitelisted_urls = _ExtractDiscoverableUrls( |
531 self._trace_from_grabbing_reference_cache, subresource_discoverer) | 570 self._trace_from_grabbing_reference_cache, subresource_discoverer) |
532 | 571 |
533 url_resources = json.load(open(self._subresources_for_urls_task.path)) | |
534 common_util.EnsureParentDirectoryExists(SetupBenchmark.path) | 572 common_util.EnsureParentDirectoryExists(SetupBenchmark.path) |
535 with open(SetupBenchmark.path, 'w') as output: | 573 with open(SetupBenchmark.path, 'w') as output: |
536 json.dump({ | 574 json.dump({ |
537 'cache_whitelist': [url for url in whitelisted_urls], | 575 'cache_whitelist': [url for url in whitelisted_urls], |
538 'subresource_discoverer': subresource_discoverer, | 576 'subresource_discoverer': subresource_discoverer, |
539 'url_resources': url_resources, | |
540 }, output) | 577 }, output) |
541 | 578 |
542 @self.RegisterTask(shared_task_prefix + '-cache.zip', merge=True, | 579 @self.RegisterTask(shared_task_prefix + '-cache.zip', merge=True, |
543 dependencies=[ | 580 dependencies=[SetupBenchmark]) |
544 SetupBenchmark, self._reference_cache_task]) | |
545 def BuildBenchmarkCacheArchive(): | 581 def BuildBenchmarkCacheArchive(): |
546 setup = json.load(open(SetupBenchmark.path)) | 582 benchmark_setup = json.load(open(SetupBenchmark.path)) |
547 chrome_cache.ApplyUrlWhitelistToCacheArchive( | 583 chrome_cache.ApplyUrlWhitelistToCacheArchive( |
548 cache_archive_path=self._reference_cache_task.path, | 584 cache_archive_path=self._cache_path, |
549 whitelisted_urls=setup['cache_whitelist'], | 585 whitelisted_urls=benchmark_setup['cache_whitelist'], |
550 output_cache_archive_path=BuildBenchmarkCacheArchive.path) | 586 output_cache_archive_path=BuildBenchmarkCacheArchive.path) |
551 | 587 |
552 @self.RegisterTask(task_prefix + '-run/', | 588 @self.RegisterTask(task_prefix + '-run/', |
553 dependencies=[BuildBenchmarkCacheArchive]) | 589 dependencies=[BuildBenchmarkCacheArchive]) |
554 def RunBenchmark(): | 590 def RunBenchmark(): |
555 runner = self._common_builder.CreateSandwichRunner() | 591 runner = self._common_builder.CreateSandwichRunner() |
556 for transformer in transformer_list: | 592 for transformer in transformer_list: |
557 transformer(runner) | 593 transformer(runner) |
558 runner.wpr_archive_path = self._patched_wpr_task.path | 594 runner.wpr_archive_path = self._wpr_archive_path |
559 runner.wpr_out_log_path = os.path.join( | 595 runner.wpr_out_log_path = os.path.join( |
560 RunBenchmark.path, sandwich_runner.WPR_LOG_FILENAME) | 596 RunBenchmark.path, sandwich_runner.WPR_LOG_FILENAME) |
561 runner.cache_archive_path = BuildBenchmarkCacheArchive.path | 597 runner.cache_archive_path = BuildBenchmarkCacheArchive.path |
562 runner.cache_operation = sandwich_runner.CacheOperation.PUSH | 598 runner.cache_operation = sandwich_runner.CacheOperation.PUSH |
563 runner.output_dir = RunBenchmark.path | 599 runner.output_dir = RunBenchmark.path |
564 runner.Run() | 600 runner.Run() |
565 | 601 |
566 @self.RegisterTask(task_prefix + '-metrics.csv', | 602 @self.RegisterTask(task_prefix + '-metrics.csv', |
567 dependencies=[RunBenchmark]) | 603 dependencies=[RunBenchmark]) |
568 def ExtractMetrics(): | 604 def ProcessRunOutputDir(): |
569 # TODO(gabadie): Performance improvement: load each trace only once and | 605 benchmark_setup = json.load(open(SetupBenchmark.path)) |
570 # use it for validation and extraction of metrics later. | 606 cache_validation_result = json.load( |
571 _VerifyBenchmarkOutputDirectory(SetupBenchmark.path, RunBenchmark.path) | 607 open(self._cache_validation_task.path)) |
572 | 608 |
573 benchmark_setup = json.load(open(SetupBenchmark.path)) | 609 run_metrics_list = _ProcessRunOutputDir( |
574 run_metrics_list = [] | 610 cache_validation_result, benchmark_setup, RunBenchmark.path) |
575 for repeat_id, repeat_dir in sandwich_runner.WalkRepeatedRuns( | 611 with open(ProcessRunOutputDir.path, 'w') as csv_file: |
576 RunBenchmark.path): | |
577 trace_path = os.path.join(repeat_dir, sandwich_runner.TRACE_FILENAME) | |
578 logging.info('processing trace: %s', trace_path) | |
579 trace = loading_trace.LoadingTrace.FromJsonFile(trace_path) | |
580 run_metrics = { | |
581 'url': trace.url, | |
582 'repeat_id': repeat_id, | |
583 'subresource_discoverer': benchmark_setup['subresource_discoverer'], | |
584 'subresource_count': len(_ListUrlRequests( | |
585 trace, _RequestOutcome.All)), | |
586 'subresource_count_theoretic': | |
587 len(benchmark_setup['url_resources']), | |
588 'cached_subresource_count': len(_ListUrlRequests( | |
589 trace, _RequestOutcome.ServedFromCache)), | |
590 'cached_subresource_count_theoretic': | |
591 len(benchmark_setup['cache_whitelist']), | |
592 } | |
593 run_metrics.update( | |
594 sandwich_metrics.ExtractCommonMetricsFromRepeatDirectory( | |
595 repeat_dir, trace)) | |
596 run_metrics_list.append(run_metrics) | |
597 | |
598 run_metrics_list.sort(key=lambda e: e['repeat_id']) | |
599 with open(ExtractMetrics.path, 'w') as csv_file: | |
600 writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names + | 612 writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names + |
601 sandwich_metrics.COMMON_CSV_COLUMN_NAMES)) | 613 sandwich_metrics.COMMON_CSV_COLUMN_NAMES)) |
602 writer.writeheader() | 614 writer.writeheader() |
603 for trace_metrics in run_metrics_list: | 615 for trace_metrics in run_metrics_list: |
604 writer.writerow(trace_metrics) | 616 writer.writerow(trace_metrics) |
605 | 617 |
606 self._common_builder.default_final_tasks.append(ExtractMetrics) | 618 self._common_builder.default_final_tasks.append(ProcessRunOutputDir) |
OLD | NEW |