tools/android/loading/sandwich_prefetch.py - Issue 2033093002: sandwich: Merge cache-validation.json and urls-for-resources.json tasks

Side by Side Diff: tools/android/loading/sandwich_prefetch.py

Issue 2033093002: sandwich: Merge cache-validation.json and urls-for-resources.json tasks (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Rebase Created 4 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 # Copyright 2016 The Chromium Authors. All rights reserved.	1 # Copyright 2016 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

	5 """

	6 Implements a task builder for benchmarking effects of NoState Prefetch.

	7 Noticeable steps of the task pipeline:

	8 * Save a WPR archive

	9 * Process the WPR archive to make all resources cacheable

	10 * Process cache archive to patch response headers back to their original

	11 values.

	12 * Find out which resources are discoverable by NoState Prefetch

	13 (HTMLPreloadScanner)

	14 * Load pages with empty/full/prefetched cache

	15 * Extract most important metrics to a CSV

	16 """

	17

5 import csv	18 import csv

6 import logging	19 import logging

7 import json	20 import json

8 import os	21 import os

9 import re	22 import re

10 import shutil	23 import shutil

11 from urlparse import urlparse	24 from urlparse import urlparse

12	25

13 import chrome_cache	26 import chrome_cache

14 import common_util	27 import common_util

(...skipping 176 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
191 elif subresource_discoverer == PARSER_DISCOVERER:	204 elif subresource_discoverer == PARSER_DISCOVERER:

192 discovered_requests = PrefetchSimulationView.ParserDiscoverableRequests(	205 discovered_requests = PrefetchSimulationView.ParserDiscoverableRequests(

193 first_resource_request, dependencies_lens)	206 first_resource_request, dependencies_lens)

194 elif subresource_discoverer == HTML_PRELOAD_SCANNER_DISCOVERER:	207 elif subresource_discoverer == HTML_PRELOAD_SCANNER_DISCOVERER:

195 discovered_requests = PrefetchSimulationView.PreloadedRequests(	208 discovered_requests = PrefetchSimulationView.PreloadedRequests(

196 first_resource_request, dependencies_lens, trace)	209 first_resource_request, dependencies_lens, trace)

197 else:	210 else:

198 assert False	211 assert False

199	212

200 whitelisted_urls = set()	213 whitelisted_urls = set()

201 logging.info('white-listing %s' % first_resource_request.url)

202 for request in _FilterOutDataAndIncompleteRequests(discovered_requests):	214 for request in _FilterOutDataAndIncompleteRequests(discovered_requests):

203 logging.info('white-listing %s' % request.url)	215 logging.debug('white-listing %s', request.url)

204 whitelisted_urls.add(request.url)	216 whitelisted_urls.add(request.url)

	217 logging.info('number of white-listed resources: %d', len(whitelisted_urls))

205 return whitelisted_urls	218 return whitelisted_urls

206	219

207	220

208 def _PrintUrlSetComparison(ref_url_set, url_set, url_set_name):	221 def _PrintUrlSetComparison(ref_url_set, url_set, url_set_name):

209 """Compare URL sets and log the diffs.	222 """Compare URL sets and log the diffs.

210	223

211 Args:	224 Args:

212 ref_url_set: Set of reference urls.	225 ref_url_set: Set of reference urls.

213 url_set: Set of urls to compare to the reference.	226 url_set: Set of urls to compare to the reference.

214 url_set_name: The set name for logging purposes.	227 url_set_name: The set name for logging purposes.

(...skipping 39 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
254 request_event.method.upper().strip() == 'POST'):	267 request_event.method.upper().strip() == 'POST'):

255 urls.add(request_event.url)	268 urls.add(request_event.url)

256 elif (request_kind == _RequestOutcome.NotServedFromCache and	269 elif (request_kind == _RequestOutcome.NotServedFromCache and

257 not request_event.from_disk_cache):	270 not request_event.from_disk_cache):

258 urls.add(request_event.url)	271 urls.add(request_event.url)

259 elif request_kind == _RequestOutcome.All:	272 elif request_kind == _RequestOutcome.All:

260 urls.add(request_event.url)	273 urls.add(request_event.url)

261 return urls	274 return urls

262	275

263	276

264 def _VerifyBenchmarkOutputDirectory(benchmark_setup_path,	277 class _RunOutputVerifier(object):

265 benchmark_output_directory_path):	278 """Object to verify benchmark run from traces and WPR log stored in the

266 """Verifies that all run inside the run_output_directory worked as expected.	279 runner output directory.

	280 """

267	281

268 Args:	282 def __init__(self, cache_validation_result, benchmark_setup):

269 benchmark_setup_path: Path of the JSON of the benchmark setup.	283 """Constructor.

270 benchmark_output_directory_path: Path of the benchmark output directory to

271 verify.

272 """

273 # TODO(gabadie): What's the best way of propagating errors happening in here?

274 benchmark_setup = json.load(open(benchmark_setup_path))

275 cache_whitelist = set(benchmark_setup['cache_whitelist'])

276 original_requests = set(benchmark_setup['url_resources'])

277 original_cached_requests = original_requests.intersection(cache_whitelist)

278 original_uncached_requests = original_requests.difference(cache_whitelist)

279 all_sent_url_requests = set()

280	284

281 # Verify requests from traces.	285 Args:

282 run_id = -1	286 cache_validation_result: JSON of the cache validation task.

283 while True:	287 benchmark_setup: JSON of the benchmark setup.

284 run_id += 1	288 """

285 run_path = os.path.join(benchmark_output_directory_path, str(run_id))	289 self._cache_whitelist = set(benchmark_setup['cache_whitelist'])

286 if not os.path.isdir(run_path):	290 self._original_requests = set(cache_validation_result['effective_requests'])

287 break	291 self._original_post_requests = set(

288 trace_path = os.path.join(run_path, sandwich_runner.TRACE_FILENAME)	292 cache_validation_result['effective_post_requests'])

289 if not os.path.isfile(trace_path):	293 self._original_cached_requests = self._original_requests.intersection(

290 logging.error('missing trace %s' % trace_path)	294 self._cache_whitelist)

291 continue	295 self._original_uncached_requests = self._original_requests.difference(

292 trace = loading_trace.LoadingTrace.FromJsonFile(trace_path)	296 self._cache_whitelist)

293 logging.info('verifying %s from %s' % (trace.url, trace_path))	297 self._all_sent_url_requests = set()

294	298

	299 def VerifyTrace(self, trace):

	300 """Verifies a trace with the cache validation result and the benchmark

	301 setup.

	302 """

295 effective_requests = _ListUrlRequests(trace, _RequestOutcome.All)	303 effective_requests = _ListUrlRequests(trace, _RequestOutcome.All)

296 effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post)	304 effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post)

297 effective_cached_requests = \	305 effective_cached_requests = \

298 _ListUrlRequests(trace, _RequestOutcome.ServedFromCache)	306 _ListUrlRequests(trace, _RequestOutcome.ServedFromCache)

299 effective_uncached_requests = \	307 effective_uncached_requests = \

300 _ListUrlRequests(trace, _RequestOutcome.NotServedFromCache)	308 _ListUrlRequests(trace, _RequestOutcome.NotServedFromCache)

301	309

302 missing_requests = original_requests.difference(effective_requests)	310 missing_requests = self._original_requests.difference(effective_requests)

303 unexpected_requests = effective_requests.difference(original_requests)	311 unexpected_requests = effective_requests.difference(self._original_requests)

304 expected_cached_requests = \	312 expected_cached_requests = \

305 original_cached_requests.difference(missing_requests)	313 self._original_cached_requests.difference(missing_requests)

306 missing_cached_requests = \	314 expected_uncached_requests = self._original_uncached_requests.union(

307 expected_cached_requests.difference(effective_cached_requests)	315 unexpected_requests).difference(missing_requests)

308 expected_uncached_requests = original_uncached_requests.union(

309 unexpected_requests).union(missing_cached_requests)

310 all_sent_url_requests.update(effective_uncached_requests)

311	316

312 # POST requests are known to be unable to use the cache.	317 # POST requests are known to be unable to use the cache.

313 expected_cached_requests.difference_update(effective_post_requests)	318 expected_cached_requests.difference_update(effective_post_requests)

314 expected_uncached_requests.update(effective_post_requests)	319 expected_uncached_requests.update(effective_post_requests)

315	320

316 _PrintUrlSetComparison(original_requests, effective_requests,	321 _PrintUrlSetComparison(self._original_requests, effective_requests,

317 'All resources')	322 'All resources')

318 _PrintUrlSetComparison(set(), effective_post_requests,	323 _PrintUrlSetComparison(set(), effective_post_requests, 'POST resources')

319 'POST resources')

320 _PrintUrlSetComparison(expected_cached_requests, effective_cached_requests,	324 _PrintUrlSetComparison(expected_cached_requests, effective_cached_requests,

321 'Cached resources')	325 'Cached resources')

322 _PrintUrlSetComparison(expected_uncached_requests,	326 _PrintUrlSetComparison(expected_uncached_requests,

323 effective_uncached_requests, 'Non cached resources')	327 effective_uncached_requests, 'Non cached resources')

324	328

325 # Verify requests from WPR.	329 self._all_sent_url_requests.update(effective_uncached_requests)

326 wpr_log_path = os.path.join(

327 benchmark_output_directory_path, sandwich_runner.WPR_LOG_FILENAME)

328 logging.info('verifying requests from %s' % wpr_log_path)

329 all_wpr_requests = wpr_backend.ExtractRequestsFromLog(wpr_log_path)

330 all_wpr_urls = set()

331 unserved_wpr_urls = set()

332 wpr_command_colliding_urls = set()

333	330

334 for request in all_wpr_requests:	331 def VerifyWprLog(self, wpr_log_path):

335 if request.is_wpr_host:	332 """Verifies WPR log with previously verified traces."""

336 continue	333 all_wpr_requests = wpr_backend.ExtractRequestsFromLog(wpr_log_path)

337 if urlparse(request.url).path.startswith('/web-page-replay'):	334 all_wpr_urls = set()

338 wpr_command_colliding_urls.add(request.url)	335 unserved_wpr_urls = set()

339 elif request.is_served is False:	336 wpr_command_colliding_urls = set()

340 unserved_wpr_urls.add(request.url)

341 all_wpr_urls.add(request.url)

342	337

343 _PrintUrlSetComparison(set(), unserved_wpr_urls,	338 for request in all_wpr_requests:

344 'Distinct unserved resources from WPR')	339 if request.is_wpr_host:

345 _PrintUrlSetComparison(set(), wpr_command_colliding_urls,	340 continue

346 'Distinct resources colliding to WPR commands')	341 if urlparse(request.url).path.startswith('/web-page-replay'):

347 _PrintUrlSetComparison(all_wpr_urls, all_sent_url_requests,	342 wpr_command_colliding_urls.add(request.url)

348 'Distinct resource requests to WPR')	343 elif request.is_served is False:

	344 unserved_wpr_urls.add(request.url)

	345 all_wpr_urls.add(request.url)

349	346

350	347 _PrintUrlSetComparison(set(), unserved_wpr_urls,

351 def _ReadSubresourceFromRunnerOutputDir(runner_output_dir):	348 'Distinct unserved resources from WPR')

352 """Extracts a list of subresources in runner output directory.	349 _PrintUrlSetComparison(set(), wpr_command_colliding_urls,

353	350 'Distinct resources colliding to WPR commands')

354 Args:	351 _PrintUrlSetComparison(all_wpr_urls, self._all_sent_url_requests,

355 runner_output_dir: Path of the runner's output directory.	352 'Distinct resource requests to WPR')

356

357 Returns:

358 [URLs of sub-resources]

359 """

360 trace_path = os.path.join(

361 runner_output_dir, '0', sandwich_runner.TRACE_FILENAME)

362 trace = loading_trace.LoadingTrace.FromJsonFile(trace_path)

363 url_set = set()

364 for request_event in _FilterOutDataAndIncompleteRequests(

365 trace.request_track.GetEvents()):

366 url_set.add(request_event.url)

367 logging.info('lists %s resources of %s from %s' % \

368 (len(url_set), trace.url, trace_path))

369 return [url for url in url_set]

370	353

371	354

372 def _ValidateCacheArchiveContent(cache_build_trace_path, cache_archive_path):	355 def _ValidateCacheArchiveContent(cache_build_trace_path, cache_archive_path):

373 """Validates a cache archive content.	356 """Validates a cache archive content.

374	357

375 Args:	358 Args:

376 cache_build_trace_path: Path of the generated trace at the cache build time.	359 cache_build_trace_path: Path of the generated trace at the cache build time.

377 cache_archive_path: Cache archive's path to validate.	360 cache_archive_path: Cache archive's path to validate.

	361

	362 Returns:

	363 {

	364 'effective_requests': [URLs of all requests],

	365 'effective_post_requests': [URLs of POST requests],

	366 'expected_cached_resources': [URLs of resources expected to be cached],

	367 'successfully_cached': [URLs of cached sub-resources]

	368 }

378 """	369 """

379 # TODO(gabadie): What's the best way of propagating errors happening in here?	370 # TODO(gabadie): What's the best way of propagating errors happening in here?

380 logging.info('lists cached urls from %s' % cache_archive_path)	371 logging.info('lists cached urls from %s' % cache_archive_path)

381 with common_util.TemporaryDirectory() as cache_directory:	372 with common_util.TemporaryDirectory() as cache_directory:

382 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory)	373 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory)

383 cache_keys = set(	374 cache_keys = set(

384 chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys())	375 chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys())

385 trace = loading_trace.LoadingTrace.FromJsonFile(cache_build_trace_path)	376 trace = loading_trace.LoadingTrace.FromJsonFile(cache_build_trace_path)

386 effective_requests = _ListUrlRequests(trace, _RequestOutcome.All)	377 effective_requests = _ListUrlRequests(trace, _RequestOutcome.All)

387 effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post)	378 effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post)

(...skipping 10 matching lines...) Expand all Loading...
398 expected_cached_requests = effective_requests.difference(	389 expected_cached_requests = effective_requests.difference(

399 effective_post_requests)	390 effective_post_requests)

400 effective_cache_keys = cache_keys.difference(	391 effective_cache_keys = cache_keys.difference(

401 upload_data_stream_cache_entry_keys)	392 upload_data_stream_cache_entry_keys)

402	393

403 _PrintUrlSetComparison(effective_post_requests, upload_data_stream_requests,	394 _PrintUrlSetComparison(effective_post_requests, upload_data_stream_requests,

404 'POST resources')	395 'POST resources')

405 _PrintUrlSetComparison(expected_cached_requests, effective_cache_keys,	396 _PrintUrlSetComparison(expected_cached_requests, effective_cache_keys,

406 'Cached resources')	397 'Cached resources')

407	398

	399 return {

	400 'effective_requests': [url for url in effective_requests],

	401 'effective_post_requests': [url for url in effective_post_requests],

	402 'expected_cached_resources': [url for url in expected_cached_requests],

	403 'successfully_cached_resources': [url for url in effective_cache_keys]

	404 }

	405

	406

	407 def _ProcessRunOutputDir(

	408 cache_validation_result, benchmark_setup, runner_output_dir):

	409 """Process benchmark's run output directory.

	410

	411 Args:

	412 cache_validation_result: Same as for _RunOutputVerifier

	413 benchmark_setup: Same as for _RunOutputVerifier

	414 runner_output_dir: Same as for SandwichRunner.output_dir

	415

	416 Returns:

	417 List of dictionary.

	418 """

	419 run_metrics_list = []

	420 run_output_verifier = _RunOutputVerifier(

	421 cache_validation_result, benchmark_setup)

	422 for repeat_id, repeat_dir in sandwich_runner.WalkRepeatedRuns(

	423 runner_output_dir):

	424 trace_path = os.path.join(repeat_dir, sandwich_runner.TRACE_FILENAME)

	425

	426 logging.info('loading trace: %s', trace_path)

	427 trace = loading_trace.LoadingTrace.FromJsonFile(trace_path)

	428

	429 logging.info('verifying trace: %s', trace_path)

	430 run_output_verifier.VerifyTrace(trace)

	431

	432 logging.info('extracting metrics from trace: %s', trace_path)

	433 run_metrics = {

	434 'url': trace.url,

	435 'repeat_id': repeat_id,

	436 'subresource_discoverer': benchmark_setup['subresource_discoverer'],

	437 'cache_recording.subresource_count':

	438 len(cache_validation_result['effective_requests']),

	439 'cache_recording.cached_subresource_count_theoretic':

	440 len(cache_validation_result['successfully_cached_resources']),

	441 'cache_recording.cached_subresource_count':

	442 len(cache_validation_result['expected_cached_resources']),

	443 'benchmark.subresource_count': len(_ListUrlRequests(

	444 trace, _RequestOutcome.All)),

	445 'benchmark.served_from_cache_count_theoretic':

	446 len(benchmark_setup['cache_whitelist']),

	447 'benchmark.served_from_cache_count': len(_ListUrlRequests(

	448 trace, _RequestOutcome.ServedFromCache)),

	449 }

	450 run_metrics.update(

	451 sandwich_metrics.ExtractCommonMetricsFromRepeatDirectory(

	452 repeat_dir, trace))

	453 run_metrics_list.append(run_metrics)

	454 run_metrics_list.sort(key=lambda e: e['repeat_id'])

	455

	456 wpr_log_path = os.path.join(

	457 runner_output_dir, sandwich_runner.WPR_LOG_FILENAME)

	458 logging.info('verifying wpr log: %s', wpr_log_path)

	459 run_output_verifier.VerifyWprLog(wpr_log_path)

	460 return run_metrics_list

	461

408	462

409 class PrefetchBenchmarkBuilder(task_manager.Builder):	463 class PrefetchBenchmarkBuilder(task_manager.Builder):

410 """A builder for a graph of tasks for NoState-Prefetch emulated benchmarks."""	464 """A builder for a graph of tasks for NoState-Prefetch emulated benchmarks."""

411	465

412 def __init__(self, common_builder):	466 def __init__(self, common_builder):

413 task_manager.Builder.__init__(self,	467 task_manager.Builder.__init__(self,

414 common_builder.output_directory,	468 common_builder.output_directory,

415 common_builder.output_subdirectory)	469 common_builder.output_subdirectory)

416 self._common_builder = common_builder	470 self._common_builder = common_builder

417	471

418 self._patched_wpr_task = None	472 self._wpr_archive_path = None

419 self._reference_cache_task = None	473 self._cache_path = None

420 self._trace_from_grabbing_reference_cache = None	474 self._trace_from_grabbing_reference_cache = None

421 self._subresources_for_urls_task = None	475 self._cache_validation_task = None

422 self._PopulateCommonPipelines()	476 self._PopulateCommonPipelines()

423	477

424 def _PopulateCommonPipelines(self):	478 def _PopulateCommonPipelines(self):

425 """Creates necessary tasks to produce initial cache archive.	479 """Creates necessary tasks to produce initial cache archive.

426	480

427 Also creates a task for producing a json file with a mapping of URLs to	481 Also creates a task for producing a json file with a mapping of URLs to

428 subresources (urls-resources.json).	482 subresources (urls-resources.json).

429	483

430 Here is the full dependency tree for the returned task:	484 Here is the full dependency tree for the returned task:

431 common/patched-cache-validation.log	485 common/patched-cache-validation.json

432 depends on: common/patched-cache.zip	486 depends on: common/patched-cache.zip

433 depends on: common/original-cache.zip	487 depends on: common/original-cache.zip

434 depends on: common/webpages-patched.wpr	488 depends on: common/webpages-patched.wpr

435 depends on: common/webpages.wpr	489 depends on: common/webpages.wpr

436 depends on: common/urls-resources.json

437 depends on: common/original-cache.zip

438 """	490 """

439 @self.RegisterTask('common/webpages-patched.wpr',	491 @self.RegisterTask('common/webpages-patched.wpr',

440 dependencies=[self._common_builder.original_wpr_task])	492 dependencies=[self._common_builder.original_wpr_task])

441 def BuildPatchedWpr():	493 def BuildPatchedWpr():

442 common_util.EnsureParentDirectoryExists(BuildPatchedWpr.path)	494 common_util.EnsureParentDirectoryExists(BuildPatchedWpr.path)

443 shutil.copyfile(	495 shutil.copyfile(

444 self._common_builder.original_wpr_task.path, BuildPatchedWpr.path)	496 self._common_builder.original_wpr_task.path, BuildPatchedWpr.path)

445 _PatchWpr(BuildPatchedWpr.path)	497 _PatchWpr(BuildPatchedWpr.path)

446	498

447 @self.RegisterTask('common/original-cache.zip', [BuildPatchedWpr])	499 @self.RegisterTask('common/original-cache.zip', [BuildPatchedWpr])

448 def BuildOriginalCache():	500 def BuildOriginalCache():

449 runner = self._common_builder.CreateSandwichRunner()	501 runner = self._common_builder.CreateSandwichRunner()

450 runner.wpr_archive_path = BuildPatchedWpr.path	502 runner.wpr_archive_path = BuildPatchedWpr.path

451 runner.cache_archive_path = BuildOriginalCache.path	503 runner.cache_archive_path = BuildOriginalCache.path

452 runner.cache_operation = sandwich_runner.CacheOperation.SAVE	504 runner.cache_operation = sandwich_runner.CacheOperation.SAVE

453 runner.output_dir = BuildOriginalCache.run_path	505 runner.output_dir = BuildOriginalCache.run_path

454 runner.Run()	506 runner.Run()

455 BuildOriginalCache.run_path = BuildOriginalCache.path[:-4] + '-run'	507 BuildOriginalCache.run_path = BuildOriginalCache.path[:-4] + '-run'

456 original_cache_trace_path = os.path.join(	508 original_cache_trace_path = os.path.join(

457 BuildOriginalCache.run_path, '0', sandwich_runner.TRACE_FILENAME)	509 BuildOriginalCache.run_path, '0', sandwich_runner.TRACE_FILENAME)

458	510

459 @self.RegisterTask('common/patched-cache.zip', [BuildOriginalCache])	511 @self.RegisterTask('common/patched-cache.zip', [BuildOriginalCache])

460 def BuildPatchedCache():	512 def BuildPatchedCache():

461 _PatchCacheArchive(BuildOriginalCache.path,	513 _PatchCacheArchive(BuildOriginalCache.path,

462 original_cache_trace_path, BuildPatchedCache.path)	514 original_cache_trace_path, BuildPatchedCache.path)

463	515

464 @self.RegisterTask('common/subresources-for-urls.json',	516 @self.RegisterTask('common/patched-cache-validation.json',

465 [BuildOriginalCache])

466 def ListUrlsResources():

467 url_resources = _ReadSubresourceFromRunnerOutputDir(

468 BuildOriginalCache.run_path)

469 with open(ListUrlsResources.path, 'w') as output:

470 json.dump(url_resources, output)

471

472 @self.RegisterTask('common/patched-cache-validation.log',

473 [BuildPatchedCache])	517 [BuildPatchedCache])

474 def ValidatePatchedCache():	518 def ValidatePatchedCache():

475 handler = logging.FileHandler(ValidatePatchedCache.path)	519 cache_validation_result = _ValidateCacheArchiveContent(

476 logging.getLogger().addHandler(handler)	520 original_cache_trace_path, BuildPatchedCache.path)

477 try:	521 with open(ValidatePatchedCache.path, 'w') as output:

478 _ValidateCacheArchiveContent(	522 json.dump(cache_validation_result, output)

479 original_cache_trace_path, BuildPatchedCache.path)

480 finally:

481 logging.getLogger().removeHandler(handler)

482	523

483 self._patched_wpr_task = BuildPatchedWpr	524 self._wpr_archive_path = BuildPatchedWpr.path

484 self._trace_from_grabbing_reference_cache = original_cache_trace_path	525 self._trace_from_grabbing_reference_cache = original_cache_trace_path

485 self._reference_cache_task = BuildPatchedCache	526 self._cache_path = BuildPatchedCache.path

486 self._subresources_for_urls_task = ListUrlsResources	527 self._cache_validation_task = ValidatePatchedCache

487	528

488 self._common_builder.default_final_tasks.append(ValidatePatchedCache)	529 self._common_builder.default_final_tasks.append(ValidatePatchedCache)

489	530

490 def PopulateLoadBenchmark(self, subresource_discoverer,	531 def PopulateLoadBenchmark(self, subresource_discoverer,

491 transformer_list_name, transformer_list):	532 transformer_list_name, transformer_list):

492 """Populate benchmarking tasks from its setup tasks.	533 """Populate benchmarking tasks from its setup tasks.

493	534

494 Args:	535 Args:

495 subresource_discoverer: Name of a subresources discoverer.	536 subresource_discoverer: Name of a subresources discoverer.

496 transformer_list_name: A string describing the transformers, will be used	537 transformer_list_name: A string describing the transformers, will be used

497 in Task names (prefer names without spaces and special characters).	538 in Task names (prefer names without spaces and special characters).

498 transformer_list: An ordered list of function that takes an instance of	539 transformer_list: An ordered list of function that takes an instance of

499 SandwichRunner as parameter, would be applied immediately before	540 SandwichRunner as parameter, would be applied immediately before

500 SandwichRunner.Run() in the given order.	541 SandwichRunner.Run() in the given order.

501	542

502 Here is the full dependency of the added tree for the returned task:	543 Here is the full dependency of the added tree for the returned task:

503 <transformer_list_name>/<subresource_discoverer>-metrics.csv	544 <transformer_list_name>/<subresource_discoverer>-metrics.csv

504 depends on: <transformer_list_name>/<subresource_discoverer>-run/	545 depends on: <transformer_list_name>/<subresource_discoverer>-run/

505 depends on: common/<subresource_discoverer>-cache.zip	546 depends on: common/<subresource_discoverer>-cache.zip

506 depends on: some tasks saved by PopulateCommonPipelines()

507 depends on: common/<subresource_discoverer>-setup.json	547 depends on: common/<subresource_discoverer>-setup.json

508 depends on: some tasks saved by PopulateCommonPipelines()	548 depends on: common/patched-cache-validation.json

509 """	549 """

510 additional_column_names = [	550 additional_column_names = [

511 'url',	551 'url',

512 'repeat_id',	552 'repeat_id',

513 'subresource_discoverer',	553 'subresource_discoverer',

514 'subresource_count',	554 'cache_recording.subresource_count',

515 # The amount of subresources detected at SetupBenchmark step.	555 'cache_recording.cached_subresource_count_theoretic',

516 'subresource_count_theoretic',	556 'cache_recording.cached_subresource_count',

517 # Amount of subresources for caching as suggested by the subresource	557 'benchmark.subresource_count',

518 # discoverer.	558 'benchmark.served_from_cache_count_theoretic',

519 'cached_subresource_count_theoretic',	559 'benchmark.served_from_cache_count']

520 'cached_subresource_count']

521	560

522 assert subresource_discoverer in SUBRESOURCE_DISCOVERERS	561 assert subresource_discoverer in SUBRESOURCE_DISCOVERERS

523 assert 'common' not in SUBRESOURCE_DISCOVERERS	562 assert 'common' not in SUBRESOURCE_DISCOVERERS

524 shared_task_prefix = os.path.join('common', subresource_discoverer)	563 shared_task_prefix = os.path.join('common', subresource_discoverer)

525 task_prefix = os.path.join(transformer_list_name, subresource_discoverer)	564 task_prefix = os.path.join(transformer_list_name, subresource_discoverer)

526	565

527 @self.RegisterTask(shared_task_prefix + '-setup.json', merge=True,	566 @self.RegisterTask(shared_task_prefix + '-setup.json', merge=True,

528 dependencies=[self._subresources_for_urls_task])	567 dependencies=[self._cache_validation_task])

529 def SetupBenchmark():	568 def SetupBenchmark():

530 whitelisted_urls = _ExtractDiscoverableUrls(	569 whitelisted_urls = _ExtractDiscoverableUrls(

531 self._trace_from_grabbing_reference_cache, subresource_discoverer)	570 self._trace_from_grabbing_reference_cache, subresource_discoverer)

532	571

533 url_resources = json.load(open(self._subresources_for_urls_task.path))

534 common_util.EnsureParentDirectoryExists(SetupBenchmark.path)	572 common_util.EnsureParentDirectoryExists(SetupBenchmark.path)

535 with open(SetupBenchmark.path, 'w') as output:	573 with open(SetupBenchmark.path, 'w') as output:

536 json.dump({	574 json.dump({

537 'cache_whitelist': [url for url in whitelisted_urls],	575 'cache_whitelist': [url for url in whitelisted_urls],

538 'subresource_discoverer': subresource_discoverer,	576 'subresource_discoverer': subresource_discoverer,

539 'url_resources': url_resources,

540 }, output)	577 }, output)

541	578

542 @self.RegisterTask(shared_task_prefix + '-cache.zip', merge=True,	579 @self.RegisterTask(shared_task_prefix + '-cache.zip', merge=True,

543 dependencies=[	580 dependencies=[SetupBenchmark])

544 SetupBenchmark, self._reference_cache_task])

545 def BuildBenchmarkCacheArchive():	581 def BuildBenchmarkCacheArchive():

546 setup = json.load(open(SetupBenchmark.path))	582 benchmark_setup = json.load(open(SetupBenchmark.path))

547 chrome_cache.ApplyUrlWhitelistToCacheArchive(	583 chrome_cache.ApplyUrlWhitelistToCacheArchive(

548 cache_archive_path=self._reference_cache_task.path,	584 cache_archive_path=self._cache_path,

549 whitelisted_urls=setup['cache_whitelist'],	585 whitelisted_urls=benchmark_setup['cache_whitelist'],

550 output_cache_archive_path=BuildBenchmarkCacheArchive.path)	586 output_cache_archive_path=BuildBenchmarkCacheArchive.path)

551	587

552 @self.RegisterTask(task_prefix + '-run/',	588 @self.RegisterTask(task_prefix + '-run/',

553 dependencies=[BuildBenchmarkCacheArchive])	589 dependencies=[BuildBenchmarkCacheArchive])

554 def RunBenchmark():	590 def RunBenchmark():

555 runner = self._common_builder.CreateSandwichRunner()	591 runner = self._common_builder.CreateSandwichRunner()

556 for transformer in transformer_list:	592 for transformer in transformer_list:

557 transformer(runner)	593 transformer(runner)

558 runner.wpr_archive_path = self._patched_wpr_task.path	594 runner.wpr_archive_path = self._wpr_archive_path

559 runner.wpr_out_log_path = os.path.join(	595 runner.wpr_out_log_path = os.path.join(

560 RunBenchmark.path, sandwich_runner.WPR_LOG_FILENAME)	596 RunBenchmark.path, sandwich_runner.WPR_LOG_FILENAME)

561 runner.cache_archive_path = BuildBenchmarkCacheArchive.path	597 runner.cache_archive_path = BuildBenchmarkCacheArchive.path

562 runner.cache_operation = sandwich_runner.CacheOperation.PUSH	598 runner.cache_operation = sandwich_runner.CacheOperation.PUSH

563 runner.output_dir = RunBenchmark.path	599 runner.output_dir = RunBenchmark.path

564 runner.Run()	600 runner.Run()

565	601

566 @self.RegisterTask(task_prefix + '-metrics.csv',	602 @self.RegisterTask(task_prefix + '-metrics.csv',

567 dependencies=[RunBenchmark])	603 dependencies=[RunBenchmark])

568 def ExtractMetrics():	604 def ProcessRunOutputDir():

569 # TODO(gabadie): Performance improvement: load each trace only once and	605 benchmark_setup = json.load(open(SetupBenchmark.path))

570 # use it for validation and extraction of metrics later.	606 cache_validation_result = json.load(

571 _VerifyBenchmarkOutputDirectory(SetupBenchmark.path, RunBenchmark.path)	607 open(self._cache_validation_task.path))

572	608

573 benchmark_setup = json.load(open(SetupBenchmark.path))	609 run_metrics_list = _ProcessRunOutputDir(

574 run_metrics_list = []	610 cache_validation_result, benchmark_setup, RunBenchmark.path)

575 for repeat_id, repeat_dir in sandwich_runner.WalkRepeatedRuns(	611 with open(ProcessRunOutputDir.path, 'w') as csv_file:

576 RunBenchmark.path):

577 trace_path = os.path.join(repeat_dir, sandwich_runner.TRACE_FILENAME)

578 logging.info('processing trace: %s', trace_path)

579 trace = loading_trace.LoadingTrace.FromJsonFile(trace_path)

580 run_metrics = {

581 'url': trace.url,

582 'repeat_id': repeat_id,

583 'subresource_discoverer': benchmark_setup['subresource_discoverer'],

584 'subresource_count': len(_ListUrlRequests(

585 trace, _RequestOutcome.All)),

586 'subresource_count_theoretic':

587 len(benchmark_setup['url_resources']),

588 'cached_subresource_count': len(_ListUrlRequests(

589 trace, _RequestOutcome.ServedFromCache)),

590 'cached_subresource_count_theoretic':

591 len(benchmark_setup['cache_whitelist']),

592 }

593 run_metrics.update(

594 sandwich_metrics.ExtractCommonMetricsFromRepeatDirectory(

595 repeat_dir, trace))

596 run_metrics_list.append(run_metrics)

597

598 run_metrics_list.sort(key=lambda e: e['repeat_id'])

599 with open(ExtractMetrics.path, 'w') as csv_file:

600 writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names +	612 writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names +

601 sandwich_metrics.COMMON_CSV_COLUMN_NAMES))	613 sandwich_metrics.COMMON_CSV_COLUMN_NAMES))

602 writer.writeheader()	614 writer.writeheader()

603 for trace_metrics in run_metrics_list:	615 for trace_metrics in run_metrics_list:

604 writer.writerow(trace_metrics)	616 writer.writerow(trace_metrics)

605	617

606 self._common_builder.default_final_tasks.append(ExtractMetrics)	618 self._common_builder.default_final_tasks.append(ProcessRunOutputDir)

OLD	NEW

« no previous file with comments | « tools/android/loading/sandwich_metrics.py ('k') | no next file » | no next file with comments »