tools/android/loading/sandwich_prefetch.py - Issue 2033093002: sandwich: Merge cache-validation.json and urls-for-resources.json tasks

Side by Side Diff: tools/android/loading/sandwich_prefetch.py

Issue 2033093002: sandwich: Merge cache-validation.json and urls-for-resources.json tasks (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Forgot PrefetchBenchmarkBuilder._PopulateCommonPipelines()'s docstring =D Created 4 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 # Copyright 2016 The Chromium Authors. All rights reserved.	1 # Copyright 2016 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 import csv	5 import csv
	pasko 2016/06/03 17:13:46 Let's add a top-level comment like: # Implements a Let's add a top-level comment like: # Implements a task builder for benchmarking effects of NoState Prefetch. # Noticeable steps of the task pipeline: # * Save a WPR archive # * Process the archive to make all resources cacheable # * Find out which resources are discoverable by NoState Prefetch (HTMLPreloadScanner) # * Load pages with empty/full/prefetched cache # * Extract most important metrics to a CSV gabadie 2016/06/06 09:43:15 Done. Show quoted text On 2016/06/03 17:13:46, pasko wrote: > Let's add a top-level comment like: > # Implements a task builder for benchmarking effects of NoState Prefetch. > # Noticeable steps of the task pipeline: > # * Save a WPR archive > # * Process the archive to make all resources cacheable > # * Find out which resources are discoverable by NoState Prefetch > (HTMLPreloadScanner) > # * Load pages with empty/full/prefetched cache > # * Extract most important metrics to a CSV Done.
6 import logging	6 import logging

7 import json	7 import json

8 import os	8 import os

9 import re	9 import re

10 import shutil	10 import shutil

11 from urlparse import urlparse	11 from urlparse import urlparse

12	12

13 import chrome_cache	13 import chrome_cache

14 import common_util	14 import common_util

15 import loading_trace	15 import loading_trace

(...skipping 175 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
191 elif subresource_discoverer == PARSER_DISCOVERER:	191 elif subresource_discoverer == PARSER_DISCOVERER:

192 discovered_requests = PrefetchSimulationView.ParserDiscoverableRequests(	192 discovered_requests = PrefetchSimulationView.ParserDiscoverableRequests(

193 first_resource_request, dependencies_lens)	193 first_resource_request, dependencies_lens)

194 elif subresource_discoverer == HTML_PRELOAD_SCANNER_DISCOVERER:	194 elif subresource_discoverer == HTML_PRELOAD_SCANNER_DISCOVERER:

195 discovered_requests = PrefetchSimulationView.PreloadedRequests(	195 discovered_requests = PrefetchSimulationView.PreloadedRequests(

196 first_resource_request, dependencies_lens, trace)	196 first_resource_request, dependencies_lens, trace)

197 else:	197 else:

198 assert False	198 assert False

199	199

200 whitelisted_urls = set()	200 whitelisted_urls = set()

201 logging.info('white-listing %s' % first_resource_request.url)

202 for request in _FilterOutDataAndIncompleteRequests(discovered_requests):	201 for request in _FilterOutDataAndIncompleteRequests(discovered_requests):

203 logging.info('white-listing %s' % request.url)	202 logging.debug('white-listing %s', request.url)

204 whitelisted_urls.add(request.url)	203 whitelisted_urls.add(request.url)

	204 logging.info('number of white-listed resources: %d', len(whitelisted_urls))

205 return whitelisted_urls	205 return whitelisted_urls

206	206

207	207

208 def _PrintUrlSetComparison(ref_url_set, url_set, url_set_name):	208 def _PrintUrlSetComparison(ref_url_set, url_set, url_set_name):

209 """Compare URL sets and log the diffs.	209 """Compare URL sets and log the diffs.

210	210

211 Args:	211 Args:

212 ref_url_set: Set of reference urls.	212 ref_url_set: Set of reference urls.

213 url_set: Set of urls to compare to the reference.	213 url_set: Set of urls to compare to the reference.

214 url_set_name: The set name for logging purposes.	214 url_set_name: The set name for logging purposes.

(...skipping 39 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
254 request_event.method.upper().strip() == 'POST'):	254 request_event.method.upper().strip() == 'POST'):

255 urls.add(request_event.url)	255 urls.add(request_event.url)

256 elif (request_kind == _RequestOutcome.NotServedFromCache and	256 elif (request_kind == _RequestOutcome.NotServedFromCache and

257 not request_event.from_disk_cache):	257 not request_event.from_disk_cache):

258 urls.add(request_event.url)	258 urls.add(request_event.url)

259 elif request_kind == _RequestOutcome.All:	259 elif request_kind == _RequestOutcome.All:

260 urls.add(request_event.url)	260 urls.add(request_event.url)

261 return urls	261 return urls

262	262

263	263

264 def _VerifyBenchmarkOutputDirectory(benchmark_setup_path,	264 class _RunOutputVerifier(object):

265 benchmark_output_directory_path):	265 """Object to verify benchmark run from traces and WPR log stored in the

266 """Verifies that all run inside the run_output_directory worked as expected.	266 runner output directory.

	267 """

267	268

268 Args:	269 def __init__(self, cache_validation_result, benchmark_setup):

269 benchmark_setup_path: Path of the JSON of the benchmark setup.	270 """Constructor.

270 benchmark_output_directory_path: Path of the benchmark output directory to

271 verify.

272 """

273 # TODO(gabadie): What's the best way of propagating errors happening in here?

274 benchmark_setup = json.load(open(benchmark_setup_path))

275 cache_whitelist = set(benchmark_setup['cache_whitelist'])

276 original_requests = set(benchmark_setup['url_resources'])

277 original_cached_requests = original_requests.intersection(cache_whitelist)

278 original_uncached_requests = original_requests.difference(cache_whitelist)

279 all_sent_url_requests = set()

280	271

281 # Verify requests from traces.	272 Args:

282 run_id = -1	273 cache_validation_result: JSON of the cache validation task.

283 while True:	274 benchmark_setup: JSON of the benchmark setup.

284 run_id += 1	275 """

285 run_path = os.path.join(benchmark_output_directory_path, str(run_id))	276 self._cache_whitelist = set(benchmark_setup['cache_whitelist'])

286 if not os.path.isdir(run_path):	277 self._original_requests = set(cache_validation_result['effective_requests'])

287 break	278 self._original_post_requests = set(

288 trace_path = os.path.join(run_path, sandwich_runner.TRACE_FILENAME)	279 cache_validation_result['effective_post_requests'])

289 if not os.path.isfile(trace_path):	280 self._original_cached_requests = self._original_requests.intersection(

290 logging.error('missing trace %s' % trace_path)	281 self._cache_whitelist)

291 continue	282 self._original_uncached_requests = self._original_requests.difference(

292 trace = loading_trace.LoadingTrace.FromJsonFile(trace_path)	283 self._cache_whitelist)

293 logging.info('verifying %s from %s' % (trace.url, trace_path))	284 self._all_sent_url_requests = set()

294	285

	286 def VerifyTrace(self, trace):

	287 """Verifies a trace with the cache validation result and the benchmark

	288 setup.

	289 """

295 effective_requests = _ListUrlRequests(trace, _RequestOutcome.All)	290 effective_requests = _ListUrlRequests(trace, _RequestOutcome.All)

296 effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post)	291 effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post)

297 effective_cached_requests = \	292 effective_cached_requests = \

298 _ListUrlRequests(trace, _RequestOutcome.ServedFromCache)	293 _ListUrlRequests(trace, _RequestOutcome.ServedFromCache)

299 effective_uncached_requests = \	294 effective_uncached_requests = \

300 _ListUrlRequests(trace, _RequestOutcome.NotServedFromCache)	295 _ListUrlRequests(trace, _RequestOutcome.NotServedFromCache)

301	296

302 missing_requests = original_requests.difference(effective_requests)	297 missing_requests = self._original_requests.difference(effective_requests)

303 unexpected_requests = effective_requests.difference(original_requests)	298 unexpected_requests = effective_requests.difference(self._original_requests)

304 expected_cached_requests = \	299 expected_cached_requests = \

305 original_cached_requests.difference(missing_requests)	300 self._original_cached_requests.difference(missing_requests)

306 missing_cached_requests = \	301 expected_uncached_requests = self._original_uncached_requests.union(

307 expected_cached_requests.difference(effective_cached_requests)	302 unexpected_requests).difference(missing_requests)

308 expected_uncached_requests = original_uncached_requests.union(

309 unexpected_requests).union(missing_cached_requests)

310 all_sent_url_requests.update(effective_uncached_requests)

311	303

312 # POST requests are known to be unable to use the cache.	304 # POST requests are known to be unable to use the cache.

313 expected_cached_requests.difference_update(effective_post_requests)	305 expected_cached_requests.difference_update(effective_post_requests)

314 expected_uncached_requests.update(effective_post_requests)	306 expected_uncached_requests.update(effective_post_requests)

315	307

316 _PrintUrlSetComparison(original_requests, effective_requests,	308 _PrintUrlSetComparison(self._original_requests, effective_requests,

317 'All resources')	309 'All resources')

318 _PrintUrlSetComparison(set(), effective_post_requests,	310 _PrintUrlSetComparison(set(), effective_post_requests, 'POST resources')

319 'POST resources')

320 _PrintUrlSetComparison(expected_cached_requests, effective_cached_requests,	311 _PrintUrlSetComparison(expected_cached_requests, effective_cached_requests,

321 'Cached resources')	312 'Cached resources')

322 _PrintUrlSetComparison(expected_uncached_requests,	313 _PrintUrlSetComparison(expected_uncached_requests,

323 effective_uncached_requests, 'Non cached resources')	314 effective_uncached_requests, 'Non cached resources')

324	315

325 # Verify requests from WPR.	316 self._all_sent_url_requests.update(effective_uncached_requests)

326 wpr_log_path = os.path.join(

327 benchmark_output_directory_path, sandwich_runner.WPR_LOG_FILENAME)

328 logging.info('verifying requests from %s' % wpr_log_path)

329 all_wpr_requests = wpr_backend.ExtractRequestsFromLog(wpr_log_path)

330 all_wpr_urls = set()

331 unserved_wpr_urls = set()

332 wpr_command_colliding_urls = set()

333	317

334 for request in all_wpr_requests:	318 def VerifyWprLog(self, wpr_log_path):

335 if request.is_wpr_host:	319 """Verifies WPR log with previously verified traces."""

336 continue	320 all_wpr_requests = wpr_backend.ExtractRequestsFromLog(wpr_log_path)

337 if urlparse(request.url).path.startswith('/web-page-replay'):	321 all_wpr_urls = set()

338 wpr_command_colliding_urls.add(request.url)	322 unserved_wpr_urls = set()

339 elif request.is_served is False:	323 wpr_command_colliding_urls = set()

340 unserved_wpr_urls.add(request.url)

341 all_wpr_urls.add(request.url)

342	324

343 _PrintUrlSetComparison(set(), unserved_wpr_urls,	325 for request in all_wpr_requests:

344 'Distinct unserved resources from WPR')	326 if request.is_wpr_host:

345 _PrintUrlSetComparison(set(), wpr_command_colliding_urls,	327 continue

346 'Distinct resources colliding to WPR commands')	328 if urlparse(request.url).path.startswith('/web-page-replay'):

347 _PrintUrlSetComparison(all_wpr_urls, all_sent_url_requests,	329 wpr_command_colliding_urls.add(request.url)

348 'Distinct resource requests to WPR')	330 elif request.is_served is False:

	331 unserved_wpr_urls.add(request.url)

	332 all_wpr_urls.add(request.url)

349	333

350	334 _PrintUrlSetComparison(set(), unserved_wpr_urls,

351 def _ReadSubresourceFromRunnerOutputDir(runner_output_dir):	335 'Distinct unserved resources from WPR')

352 """Extracts a list of subresources in runner output directory.	336 _PrintUrlSetComparison(set(), wpr_command_colliding_urls,

353	337 'Distinct resources colliding to WPR commands')

354 Args:	338 _PrintUrlSetComparison(all_wpr_urls, self._all_sent_url_requests,

355 runner_output_dir: Path of the runner's output directory.	339 'Distinct resource requests to WPR')

356

357 Returns:

358 [URLs of sub-resources]

359 """

360 trace_path = os.path.join(

361 runner_output_dir, '0', sandwich_runner.TRACE_FILENAME)

362 trace = loading_trace.LoadingTrace.FromJsonFile(trace_path)

363 url_set = set()

364 for request_event in _FilterOutDataAndIncompleteRequests(

365 trace.request_track.GetEvents()):

366 url_set.add(request_event.url)

367 logging.info('lists %s resources of %s from %s' % \

368 (len(url_set), trace.url, trace_path))

369 return [url for url in url_set]

370	340

371	341

372 def _ValidateCacheArchiveContent(cache_build_trace_path, cache_archive_path):	342 def _ValidateCacheArchiveContent(cache_build_trace_path, cache_archive_path):

373 """Validates a cache archive content.	343 """Validates a cache archive content.

374	344

375 Args:	345 Args:

376 cache_build_trace_path: Path of the generated trace at the cache build time.	346 cache_build_trace_path: Path of the generated trace at the cache build time.

377 cache_archive_path: Cache archive's path to validate.	347 cache_archive_path: Cache archive's path to validate.

	348

	349 Returns:

	350 {

	351 'effective_requests': [URLs of all requests],

	352 'effective_post_requests': [URLs of POST requests],

	353 'expected_cached_resources': [URLs of resources expected to be cached]
	pasko 2016/06/03 17:13:46 nit: comma at the end nit: comma at the end gabadie 2016/06/06 09:43:15 Done. Show quoted text On 2016/06/03 17:13:46, pasko wrote: > nit: comma at the end Done.
	354 'successfully_cached': [URLs of cached sub-resources]

	355 }

378 """	356 """

379 # TODO(gabadie): What's the best way of propagating errors happening in here?	357 # TODO(gabadie): What's the best way of propagating errors happening in here?

380 logging.info('lists cached urls from %s' % cache_archive_path)	358 logging.info('lists cached urls from %s' % cache_archive_path)

381 with common_util.TemporaryDirectory() as cache_directory:	359 with common_util.TemporaryDirectory() as cache_directory:

382 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory)	360 chrome_cache.UnzipDirectoryContent(cache_archive_path, cache_directory)

383 cache_keys = set(	361 cache_keys = set(

384 chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys())	362 chrome_cache.CacheBackend(cache_directory, 'simple').ListKeys())

385 trace = loading_trace.LoadingTrace.FromJsonFile(cache_build_trace_path)	363 trace = loading_trace.LoadingTrace.FromJsonFile(cache_build_trace_path)

386 effective_requests = _ListUrlRequests(trace, _RequestOutcome.All)	364 effective_requests = _ListUrlRequests(trace, _RequestOutcome.All)

387 effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post)	365 effective_post_requests = _ListUrlRequests(trace, _RequestOutcome.Post)

(...skipping 10 matching lines...) Expand all Loading...
398 expected_cached_requests = effective_requests.difference(	376 expected_cached_requests = effective_requests.difference(

399 effective_post_requests)	377 effective_post_requests)

400 effective_cache_keys = cache_keys.difference(	378 effective_cache_keys = cache_keys.difference(

401 upload_data_stream_cache_entry_keys)	379 upload_data_stream_cache_entry_keys)

402	380

403 _PrintUrlSetComparison(effective_post_requests, upload_data_stream_requests,	381 _PrintUrlSetComparison(effective_post_requests, upload_data_stream_requests,

404 'POST resources')	382 'POST resources')

405 _PrintUrlSetComparison(expected_cached_requests, effective_cache_keys,	383 _PrintUrlSetComparison(expected_cached_requests, effective_cache_keys,

406 'Cached resources')	384 'Cached resources')

407	385

	386 return {

	387 'effective_requests': [url for url in effective_requests],

	388 'effective_post_requests': [url for url in effective_post_requests],

	389 'expected_cached_resources': [url for url in expected_cached_requests],

	390 'successfully_cached_resources': [url for url in effective_cache_keys]

	391 }

	392

	393

	394 def _ProcessRunOutputDir(

	395 cache_validation_result, benchmark_setup, runner_output_dir):

	396 """Process benchmark's run output directory.

	397

	398 Args:

	399 cache_validation_result: Same as for _RunOutputVerifier

	400 benchmark_setup: Same as for _RunOutputVerifier

	401 runner_output_dir: Same as for SandwichRunner.output_dir

	402

	403 Returns:

	404 List of dictionary.

	405 """

	406 run_metrics_list = []

	407 run_output_verifier = _RunOutputVerifier(

	408 cache_validation_result, benchmark_setup)

	409 for repeat_id, repeat_dir in sandwich_runner.WalkRepeatedRuns(

	410 runner_output_dir):

	411 trace_path = os.path.join(repeat_dir, sandwich_runner.TRACE_FILENAME)

	412

	413 logging.info('loading trace: %s', trace_path)

	414 trace = loading_trace.LoadingTrace.FromJsonFile(trace_path)

	415

	416 logging.info('verifying trace: %s', trace_path)

	417 run_output_verifier.VerifyTrace(trace)

	418

	419 logging.info('extracting metrics from trace: %s', trace_path)

	420 run_metrics = {

	421 'url': trace.url,

	422 'repeat_id': repeat_id,

	423 'subresource_discoverer': benchmark_setup['subresource_discoverer'],

	424 'cache_recording.subresource_count':

	425 len(cache_validation_result['effective_requests']),

	426 'cache_recording.cached_subresource_count_theoretic':

	427 len(cache_validation_result['successfully_cached_resources']),

	428 'cache_recording.cached_subresource_count':

	429 len(cache_validation_result['expected_cached_resources']),

	430 'benchmark_repeat.subresource_count': len(_ListUrlRequests(

	431 trace, _RequestOutcome.All)),

	432 'benchmark_repeat.served_from_cache_count_theoretic':

	433 len(benchmark_setup['cache_whitelist']),

	434 'benchmark_repeat.served_from_cache_count': len(_ListUrlRequests(

	435 trace, _RequestOutcome.ServedFromCache)),

	436 }

	437 run_metrics.update(

	438 sandwich_metrics.ExtractCommonMetricsFromRepeatDirectory(

	439 repeat_dir, trace))

	440 run_metrics_list.append(run_metrics)

	441 run_metrics_list.sort(key=lambda e: e['repeat_id'])

	442

	443 wpr_log_path = os.path.join(

	444 runner_output_dir, sandwich_runner.WPR_LOG_FILENAME)

	445 logging.info('verifying wpr log: %s', wpr_log_path)

	446 run_output_verifier.VerifyWprLog(wpr_log_path)

	447 return run_metrics_list

	448

408	449

409 class PrefetchBenchmarkBuilder(task_manager.Builder):	450 class PrefetchBenchmarkBuilder(task_manager.Builder):

410 """A builder for a graph of tasks for NoState-Prefetch emulated benchmarks."""	451 """A builder for a graph of tasks for NoState-Prefetch emulated benchmarks."""

411	452

412 def __init__(self, common_builder):	453 def __init__(self, common_builder):

413 task_manager.Builder.__init__(self,	454 task_manager.Builder.__init__(self,

414 common_builder.output_directory,	455 common_builder.output_directory,

415 common_builder.output_subdirectory)	456 common_builder.output_subdirectory)

416 self._common_builder = common_builder	457 self._common_builder = common_builder

417	458

418 self._patched_wpr_task = None	459 self._patched_wpr_task = None

419 self._reference_cache_task = None	460 self._cache_task = None

420 self._trace_from_grabbing_reference_cache = None	461 self._trace_from_grabbing_reference_cache = None

421 self._subresources_for_urls_task = None	462 self._cache_validation_task = None
	pasko 2016/06/03 17:13:46 I would prefer to just keep a path here than the w I would prefer to just keep a path here than the whole task. Task is more powerful, but with power comes more responsibility. What if someone decides that it's a good idea to discover whether a task was run? That would be a bad design, right? Stating explicitly what we need here would make it for a more relaxed reading/maintaining. gabadie 2016/06/06 09:43:14 Done. Show quoted text On 2016/06/03 17:13:46, pasko wrote: > I would prefer to just keep a path here than the whole task. Task is more > powerful, but with power comes more responsibility. > > What if someone decides that it's a good idea to discover whether a task was > run? That would be a bad design, right? Stating explicitly what we need here > would make it for a more relaxed reading/maintaining. Done.
422 self._PopulateCommonPipelines()	463 self._PopulateCommonPipelines()

423	464

424 def _PopulateCommonPipelines(self):	465 def _PopulateCommonPipelines(self):

425 """Creates necessary tasks to produce initial cache archive.	466 """Creates necessary tasks to produce initial cache archive.

426	467

427 Also creates a task for producing a json file with a mapping of URLs to	468 Also creates a task for producing a json file with a mapping of URLs to

428 subresources (urls-resources.json).	469 subresources (urls-resources.json).

429	470

430 Here is the full dependency tree for the returned task:	471 Here is the full dependency tree for the returned task:

431 common/patched-cache-validation.log	472 common/patched-cache-validation.json
	pasko 2016/06/03 17:13:46 probably should rename later to something like pat probably should rename later to something like patched-cache-metadata.json. It better describes the fact that there is more than validation stuff. Better be done in a separate change. Maybe a TODO(gabadie) here then? gabadie 2016/06/06 09:43:14 Ok will make it in a separate CL. Show quoted text On 2016/06/03 17:13:46, pasko wrote: > probably should rename later to something like patched-cache-metadata.json. It > better describes the fact that there is more than validation stuff. > > Better be done in a separate change. Maybe a TODO(gabadie) here then? Ok will make it in a separate CL.
432 depends on: common/patched-cache.zip	473 depends on: common/patched-cache.zip

433 depends on: common/original-cache.zip	474 depends on: common/original-cache.zip

434 depends on: common/webpages-patched.wpr	475 depends on: common/webpages-patched.wpr

435 depends on: common/webpages.wpr	476 depends on: common/webpages.wpr

436 depends on: common/urls-resources.json

437 depends on: common/original-cache.zip

438 """	477 """

439 @self.RegisterTask('common/webpages-patched.wpr',	478 @self.RegisterTask('common/webpages-patched.wpr',

440 dependencies=[self._common_builder.original_wpr_task])	479 dependencies=[self._common_builder.original_wpr_task])

441 def BuildPatchedWpr():	480 def BuildPatchedWpr():

442 common_util.EnsureParentDirectoryExists(BuildPatchedWpr.path)	481 common_util.EnsureParentDirectoryExists(BuildPatchedWpr.path)

443 shutil.copyfile(	482 shutil.copyfile(

444 self._common_builder.original_wpr_task.path, BuildPatchedWpr.path)	483 self._common_builder.original_wpr_task.path, BuildPatchedWpr.path)

445 _PatchWpr(BuildPatchedWpr.path)	484 _PatchWpr(BuildPatchedWpr.path)

446	485

447 @self.RegisterTask('common/original-cache.zip', [BuildPatchedWpr])	486 @self.RegisterTask('common/original-cache.zip', [BuildPatchedWpr])

448 def BuildOriginalCache():	487 def BuildOriginalCache():

449 runner = self._common_builder.CreateSandwichRunner()	488 runner = self._common_builder.CreateSandwichRunner()

450 runner.wpr_archive_path = BuildPatchedWpr.path	489 runner.wpr_archive_path = BuildPatchedWpr.path

451 runner.cache_archive_path = BuildOriginalCache.path	490 runner.cache_archive_path = BuildOriginalCache.path

452 runner.cache_operation = sandwich_runner.CacheOperation.SAVE	491 runner.cache_operation = sandwich_runner.CacheOperation.SAVE

453 runner.output_dir = BuildOriginalCache.run_path	492 runner.output_dir = BuildOriginalCache.run_path

454 runner.Run()	493 runner.Run()

455 BuildOriginalCache.run_path = BuildOriginalCache.path[:-4] + '-run'	494 BuildOriginalCache.run_path = BuildOriginalCache.path[:-4] + '-run'

456 original_cache_trace_path = os.path.join(	495 original_cache_trace_path = os.path.join(

457 BuildOriginalCache.run_path, '0', sandwich_runner.TRACE_FILENAME)	496 BuildOriginalCache.run_path, '0', sandwich_runner.TRACE_FILENAME)

458	497

459 @self.RegisterTask('common/patched-cache.zip', [BuildOriginalCache])	498 @self.RegisterTask('common/patched-cache.zip', [BuildOriginalCache])

460 def BuildPatchedCache():	499 def BuildPatchedCache():

461 _PatchCacheArchive(BuildOriginalCache.path,	500 _PatchCacheArchive(BuildOriginalCache.path,

462 original_cache_trace_path, BuildPatchedCache.path)	501 original_cache_trace_path, BuildPatchedCache.path)

463	502

464 @self.RegisterTask('common/subresources-for-urls.json',	503 @self.RegisterTask('common/patched-cache-validation.json',

465 [BuildOriginalCache])

466 def ListUrlsResources():

467 url_resources = _ReadSubresourceFromRunnerOutputDir(

468 BuildOriginalCache.run_path)

469 with open(ListUrlsResources.path, 'w') as output:

470 json.dump(url_resources, output)

471

472 @self.RegisterTask('common/patched-cache-validation.log',

473 [BuildPatchedCache])	504 [BuildPatchedCache])

474 def ValidatePatchedCache():	505 def ValidatePatchedCache():

475 handler = logging.FileHandler(ValidatePatchedCache.path)	506 cache_validation_result = _ValidateCacheArchiveContent(

476 logging.getLogger().addHandler(handler)	507 original_cache_trace_path, BuildPatchedCache.path)

477 try:	508 with open(ValidatePatchedCache.path, 'w') as output:

478 _ValidateCacheArchiveContent(	509 json.dump(cache_validation_result, output)

479 original_cache_trace_path, BuildPatchedCache.path)

480 finally:

481 logging.getLogger().removeHandler(handler)

482	510

483 self._patched_wpr_task = BuildPatchedWpr	511 self._patched_wpr_task = BuildPatchedWpr

484 self._trace_from_grabbing_reference_cache = original_cache_trace_path	512 self._trace_from_grabbing_reference_cache = original_cache_trace_path

485 self._reference_cache_task = BuildPatchedCache	513 self._cache_task = BuildPatchedCache

486 self._subresources_for_urls_task = ListUrlsResources	514 self._cache_validation_task = ValidatePatchedCache

487	515

488 self._common_builder.default_final_tasks.append(ValidatePatchedCache)	516 self._common_builder.default_final_tasks.append(ValidatePatchedCache)

489	517

490 def PopulateLoadBenchmark(self, subresource_discoverer,	518 def PopulateLoadBenchmark(self, subresource_discoverer,

491 transformer_list_name, transformer_list):	519 transformer_list_name, transformer_list):

492 """Populate benchmarking tasks from its setup tasks.	520 """Populate benchmarking tasks from its setup tasks.

493	521

494 Args:	522 Args:

495 subresource_discoverer: Name of a subresources discoverer.	523 subresource_discoverer: Name of a subresources discoverer.

496 transformer_list_name: A string describing the transformers, will be used	524 transformer_list_name: A string describing the transformers, will be used

497 in Task names (prefer names without spaces and special characters).	525 in Task names (prefer names without spaces and special characters).

498 transformer_list: An ordered list of function that takes an instance of	526 transformer_list: An ordered list of function that takes an instance of

499 SandwichRunner as parameter, would be applied immediately before	527 SandwichRunner as parameter, would be applied immediately before

500 SandwichRunner.Run() in the given order.	528 SandwichRunner.Run() in the given order.

501	529

502 Here is the full dependency of the added tree for the returned task:	530 Here is the full dependency of the added tree for the returned task:

503 <transformer_list_name>/<subresource_discoverer>-metrics.csv	531 <transformer_list_name>/<subresource_discoverer>-metrics.csv

504 depends on: <transformer_list_name>/<subresource_discoverer>-run/	532 depends on: <transformer_list_name>/<subresource_discoverer>-run/

505 depends on: common/<subresource_discoverer>-cache.zip	533 depends on: common/<subresource_discoverer>-cache.zip

506 depends on: some tasks saved by PopulateCommonPipelines()	534 depends on: some tasks saved by PopulateCommonPipelines()
	pasko 2016/06/03 17:13:46 only depends on -setup.json, right? only depends on -setup.json, right? gabadie 2016/06/06 09:43:14 Done. Show quoted text On 2016/06/03 17:13:46, pasko wrote: > only depends on -setup.json, right? Done.
507 depends on: common/<subresource_discoverer>-setup.json	535 depends on: common/<subresource_discoverer>-setup.json

508 depends on: some tasks saved by PopulateCommonPipelines()	536 depends on: some tasks saved by PopulateCommonPipelines()
	pasko 2016/06/03 17:13:46 some? should it just say common/patched-cache-vali some? should it just say common/patched-cache-validation.json? gabadie 2016/06/06 09:43:14 Done. Show quoted text On 2016/06/03 17:13:46, pasko wrote: > some? should it just say common/patched-cache-validation.json? Done.
509 """	537 """

510 additional_column_names = [	538 additional_column_names = [

511 'url',	539 'url',

512 'repeat_id',	540 'repeat_id',

513 'subresource_discoverer',	541 'subresource_discoverer',

514 'subresource_count',	542 'cache_recording.subresource_count',

515 # The amount of subresources detected at SetupBenchmark step.	543 'cache_recording.cached_subresource_count_theoretic',

516 'subresource_count_theoretic',	544 'cache_recording.cached_subresource_count',

517 # Amount of subresources for caching as suggested by the subresource	545 'benchmark_repeat.subresource_count',
	pasko 2016/06/03 17:13:46 'benchmark_repeat' sounds more like a command than 'benchmark_repeat' sounds more like a command than a category name. Suggestions: * 'benchmark_resources' * 'benchmark' gabadie 2016/06/06 09:43:15 Done. Show quoted text On 2016/06/03 17:13:46, pasko wrote: > 'benchmark_repeat' sounds more like a command than a category name. > > Suggestions: > * 'benchmark_resources' > * 'benchmark' Done.
518 # discoverer.	546 'benchmark_repeat.served_from_cache_count_theoretic',

519 'cached_subresource_count_theoretic',	547 'benchmark_repeat.served_from_cache_count']

520 'cached_subresource_count']

521	548

522 assert subresource_discoverer in SUBRESOURCE_DISCOVERERS	549 assert subresource_discoverer in SUBRESOURCE_DISCOVERERS

523 assert 'common' not in SUBRESOURCE_DISCOVERERS	550 assert 'common' not in SUBRESOURCE_DISCOVERERS

524 shared_task_prefix = os.path.join('common', subresource_discoverer)	551 shared_task_prefix = os.path.join('common', subresource_discoverer)

525 task_prefix = os.path.join(transformer_list_name, subresource_discoverer)	552 task_prefix = os.path.join(transformer_list_name, subresource_discoverer)

526	553

527 @self.RegisterTask(shared_task_prefix + '-setup.json', merge=True,	554 @self.RegisterTask(shared_task_prefix + '-setup.json', merge=True,

528 dependencies=[self._subresources_for_urls_task])	555 dependencies=[self._cache_validation_task])

529 def SetupBenchmark():	556 def SetupBenchmark():

530 whitelisted_urls = _ExtractDiscoverableUrls(	557 whitelisted_urls = _ExtractDiscoverableUrls(

531 self._trace_from_grabbing_reference_cache, subresource_discoverer)	558 self._trace_from_grabbing_reference_cache, subresource_discoverer)

532	559

533 url_resources = json.load(open(self._subresources_for_urls_task.path))

534 common_util.EnsureParentDirectoryExists(SetupBenchmark.path)	560 common_util.EnsureParentDirectoryExists(SetupBenchmark.path)

535 with open(SetupBenchmark.path, 'w') as output:	561 with open(SetupBenchmark.path, 'w') as output:

536 json.dump({	562 json.dump({

537 'cache_whitelist': [url for url in whitelisted_urls],	563 'cache_whitelist': [url for url in whitelisted_urls],

538 'subresource_discoverer': subresource_discoverer,	564 'subresource_discoverer': subresource_discoverer,

539 'url_resources': url_resources,

540 }, output)	565 }, output)

541	566

542 @self.RegisterTask(shared_task_prefix + '-cache.zip', merge=True,	567 @self.RegisterTask(shared_task_prefix + '-cache.zip', merge=True,

543 dependencies=[	568 dependencies=[SetupBenchmark])

544 SetupBenchmark, self._reference_cache_task])

545 def BuildBenchmarkCacheArchive():	569 def BuildBenchmarkCacheArchive():

546 setup = json.load(open(SetupBenchmark.path))	570 benchmark_setup = json.load(open(SetupBenchmark.path))

547 chrome_cache.ApplyUrlWhitelistToCacheArchive(	571 chrome_cache.ApplyUrlWhitelistToCacheArchive(

548 cache_archive_path=self._reference_cache_task.path,	572 cache_archive_path=self._cache_task.path,

549 whitelisted_urls=setup['cache_whitelist'],	573 whitelisted_urls=benchmark_setup['cache_whitelist'],

550 output_cache_archive_path=BuildBenchmarkCacheArchive.path)	574 output_cache_archive_path=BuildBenchmarkCacheArchive.path)

551	575

552 @self.RegisterTask(task_prefix + '-run/',	576 @self.RegisterTask(task_prefix + '-run/',

553 dependencies=[BuildBenchmarkCacheArchive])	577 dependencies=[BuildBenchmarkCacheArchive])

554 def RunBenchmark():	578 def RunBenchmark():

555 runner = self._common_builder.CreateSandwichRunner()	579 runner = self._common_builder.CreateSandwichRunner()

556 for transformer in transformer_list:	580 for transformer in transformer_list:

557 transformer(runner)	581 transformer(runner)

558 runner.wpr_archive_path = self._patched_wpr_task.path	582 runner.wpr_archive_path = self._patched_wpr_task.path

559 runner.wpr_out_log_path = os.path.join(	583 runner.wpr_out_log_path = os.path.join(

560 RunBenchmark.path, sandwich_runner.WPR_LOG_FILENAME)	584 RunBenchmark.path, sandwich_runner.WPR_LOG_FILENAME)

561 runner.cache_archive_path = BuildBenchmarkCacheArchive.path	585 runner.cache_archive_path = BuildBenchmarkCacheArchive.path

562 runner.cache_operation = sandwich_runner.CacheOperation.PUSH	586 runner.cache_operation = sandwich_runner.CacheOperation.PUSH

563 runner.output_dir = RunBenchmark.path	587 runner.output_dir = RunBenchmark.path

564 runner.Run()	588 runner.Run()

565	589

566 @self.RegisterTask(task_prefix + '-metrics.csv',	590 @self.RegisterTask(task_prefix + '-metrics.csv',

567 dependencies=[RunBenchmark])	591 dependencies=[RunBenchmark])

568 def ExtractMetrics():	592 def ProcessRunOutputDir():

569 # TODO(gabadie): Performance improvement: load each trace only once and	593 benchmark_setup = json.load(open(SetupBenchmark.path))

570 # use it for validation and extraction of metrics later.	594 cache_validation_result = json.load(

571 _VerifyBenchmarkOutputDirectory(SetupBenchmark.path, RunBenchmark.path)	595 open(self._cache_validation_task.path))

572	596

573 benchmark_setup = json.load(open(SetupBenchmark.path))	597 run_metrics_list = _ProcessRunOutputDir(

574 run_metrics_list = []	598 cache_validation_result, benchmark_setup, RunBenchmark.path)

575 for repeat_id, repeat_dir in sandwich_runner.WalkRepeatedRuns(	599 with open(ProcessRunOutputDir.path, 'w') as csv_file:

576 RunBenchmark.path):

577 trace_path = os.path.join(repeat_dir, sandwich_runner.TRACE_FILENAME)

578 logging.info('processing trace: %s', trace_path)

579 trace = loading_trace.LoadingTrace.FromJsonFile(trace_path)

580 run_metrics = {

581 'url': trace.url,

582 'repeat_id': repeat_id,

583 'subresource_discoverer': benchmark_setup['subresource_discoverer'],

584 'subresource_count': len(_ListUrlRequests(

585 trace, _RequestOutcome.All)),

586 'subresource_count_theoretic':

587 len(benchmark_setup['url_resources']),

588 'cached_subresource_count': len(_ListUrlRequests(

589 trace, _RequestOutcome.ServedFromCache)),

590 'cached_subresource_count_theoretic':

591 len(benchmark_setup['cache_whitelist']),

592 }

593 run_metrics.update(

594 sandwich_metrics.ExtractCommonMetricsFromRepeatDirectory(

595 repeat_dir, trace))

596 run_metrics_list.append(run_metrics)

597

598 run_metrics_list.sort(key=lambda e: e['repeat_id'])

599 with open(ExtractMetrics.path, 'w') as csv_file:

600 writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names +	600 writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names +

601 sandwich_metrics.COMMON_CSV_COLUMN_NAMES))	601 sandwich_metrics.COMMON_CSV_COLUMN_NAMES))

602 writer.writeheader()	602 writer.writeheader()

603 for trace_metrics in run_metrics_list:	603 for trace_metrics in run_metrics_list:

604 writer.writerow(trace_metrics)	604 writer.writerow(trace_metrics)

605	605

606 self._common_builder.default_final_tasks.append(ExtractMetrics)	606 self._common_builder.default_final_tasks.append(ProcessRunOutputDir)

OLD	NEW

« no previous file with comments | « tools/android/loading/sandwich_metrics.py ('k') | no next file » | no next file with comments »