tools/android/loading/sandwich_prefetch.py - Issue 2112483002: sandwich: Fixes two sources of KeyError task failures

Side by Side Diff: tools/android/loading/sandwich_prefetch.py

Issue 2112483002: sandwich: Fixes two sources of KeyError task failures (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 4 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 # Copyright 2016 The Chromium Authors. All rights reserved.	1 # Copyright 2016 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 """	5 """

6 Implements a task builder for benchmarking effects of NoState Prefetch.	6 Implements a task builder for benchmarking effects of NoState Prefetch.

7 Noticeable steps of the task pipeline:	7 Noticeable steps of the task pipeline:

8 * Save a WPR archive	8 * Save a WPR archive

9 * Process the WPR archive to make all resources cacheable	9 * Process the WPR archive to make all resources cacheable

10 * Process cache archive to patch response headers back to their original	10 * Process cache archive to patch response headers back to their original

(...skipping 12 matching lines...) Expand all Loading...
23 import shutil	23 import shutil

24 from urlparse import urlparse	24 from urlparse import urlparse

25	25

26 import chrome_cache	26 import chrome_cache

27 import common_util	27 import common_util

28 import loading_trace	28 import loading_trace

29 from prefetch_view import PrefetchSimulationView	29 from prefetch_view import PrefetchSimulationView

30 from request_dependencies_lens import RequestDependencyLens	30 from request_dependencies_lens import RequestDependencyLens

31 import sandwich_metrics	31 import sandwich_metrics

32 import sandwich_runner	32 import sandwich_runner

	33 import sandwich_utils

33 import task_manager	34 import task_manager

34 import wpr_backend	35 import wpr_backend

35	36

36	37

37 class Discoverer(object):	38 class Discoverer(object):

38 # Do not prefetch anything.	39 # Do not prefetch anything.

39 EmptyCache = 'empty-cache'	40 EmptyCache = 'empty-cache'

40	41

41 # Prefetches everything to load fully from cache (impossible in practice).	42 # Prefetches everything to load fully from cache (impossible in practice).

42 FullCache = 'full-cache'	43 FullCache = 'full-cache'

(...skipping 149 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
192 logging.info('number of requests discovered by %s: %d',	193 logging.info('number of requests discovered by %s: %d',

193 subresource_discoverer, len(requests))	194 subresource_discoverer, len(requests))

194 return requests	195 return requests

195	196

196	197

197 def _PruneOutOriginalNoStoreRequests(original_headers_path, requests):	198 def _PruneOutOriginalNoStoreRequests(original_headers_path, requests):

198 with open(original_headers_path) as file_input:	199 with open(original_headers_path) as file_input:

199 original_headers = json.load(file_input)	200 original_headers = json.load(file_input)

200 pruned_requests = set()	201 pruned_requests = set()

201 for request in requests:	202 for request in requests:

202 request_original_headers = original_headers[request.url]	203 url = sandwich_utils.NormalizeUrl(request.url)

	204 if url not in original_headers:

	205 # TODO(gabadie): Dig why these requests were not in WPR.
	pasko 2016/07/04 15:55:19 nit: s/Dig/Investigate/ yeah, that's a bit surpri nit: s/Dig/Investigate/ yeah, that's a bit surprising, and suggests to me that our scanner discoverer is more magic than it should be :( gabadie 2016/07/04 17:03:43 Done. Show quoted text On 2016/07/04 15:55:19, pasko wrote: > nit: s/Dig/Investigate/ > > yeah, that's a bit surprising, and suggests to me that our scanner discoverer is > more magic than it should be :( Done.
	206 assert request.failed

	207 logging.warning(

	208 'could not find original headers for: %s (failure: %s)',

	209 url, request.error_text)

	210 continue

	211 request_original_headers = original_headers[url]

203 if ('cache-control' in request_original_headers and	212 if ('cache-control' in request_original_headers and

204 'no-store' in request_original_headers['cache-control'].lower()):	213 'no-store' in request_original_headers['cache-control'].lower()):

205 pruned_requests.add(request)	214 pruned_requests.add(request)

206 return [r for r in requests if r not in pruned_requests]	215 return [r for r in requests if r not in pruned_requests]

207	216

208	217

209 def _ExtractDiscoverableUrls(	218 def _ExtractDiscoverableUrls(

210 original_headers_path, loading_trace_path, subresource_discoverer):	219 original_headers_path, loading_trace_path, subresource_discoverer):

211 """Extracts discoverable resource urls from a loading trace according to a	220 """Extracts discoverable resource urls from a loading trace according to a

212 sub-resource discoverer.	221 sub-resource discoverer.

(...skipping 259 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
472 logging.info('loading trace: %s', trace_path)	481 logging.info('loading trace: %s', trace_path)

473 trace = loading_trace.LoadingTrace.FromJsonFile(trace_path)	482 trace = loading_trace.LoadingTrace.FromJsonFile(trace_path)

474	483

475 logging.info('verifying trace: %s', trace_path)	484 logging.info('verifying trace: %s', trace_path)

476 run_output_verifier.VerifyTrace(trace)	485 run_output_verifier.VerifyTrace(trace)

477	486

478 logging.info('extracting metrics from trace: %s', trace_path)	487 logging.info('extracting metrics from trace: %s', trace_path)

479 served_from_network_bytes = 0	488 served_from_network_bytes = 0

480 served_from_cache_bytes = 0	489 served_from_cache_bytes = 0

481 urls_hitting_network = set()	490 urls_hitting_network = set()

	491 response_sizes = {}

482 for request in _FilterOutDataAndIncompleteRequests(	492 for request in _FilterOutDataAndIncompleteRequests(

483 trace.request_track.GetEvents()):	493 trace.request_track.GetEvents()):

484 # Ignore requests served from the blink's cache.	494 # Ignore requests served from the blink's cache.

485 if request.served_from_cache:	495 if request.served_from_cache:

486 continue	496 continue

487 urls_hitting_network.add(request.url)	497 urls_hitting_network.add(request.url)

488 if request.from_disk_cache:	498 if request.from_disk_cache:

489 served_from_cache_bytes += cached_encoded_data_lengths[request.url]	499 if request.url in cached_encoded_data_lengths:

	500 response_size = cached_encoded_data_lengths[request.url]

	501 else:

	502 # Some fat webpages may overflow the Memory cache, and so some

	503 # requests might be served from disk cache couple of times per page

	504 # load.
	pasko 2016/07/04 15:55:19 can you spit a log message here? It would be nice can you spit a log message here? It would be nice to make a list of URLs that overflow memory cache - for folks who are busy reimplementing the memorycache. gabadie 2016/07/04 17:03:43 Done. Show quoted text On 2016/07/04 15:55:19, pasko wrote: > can you spit a log message here? It would be nice to make a list of URLs that > overflow memory cache - for folks who are busy reimplementing the memorycache. Done.
	505 response_size = response_sizes[request.url]

	506 served_from_cache_bytes += response_size

490 else:	507 else:

491 served_from_network_bytes += request.GetEncodedDataLength()	508 response_size = request.GetEncodedDataLength()

	509 served_from_network_bytes += response_size

	510 response_sizes[request.url] = response_size

492	511

493 # Make sure the served from blink's cache requests have at least one	512 # Make sure the served from blink's cache requests have at least one

494 # corresponding request that was not served from the blink's cache.	513 # corresponding request that was not served from the blink's cache.

495 for request in _FilterOutDataAndIncompleteRequests(	514 for request in _FilterOutDataAndIncompleteRequests(

496 trace.request_track.GetEvents()):	515 trace.request_track.GetEvents()):

497 assert (request.url in urls_hitting_network or	516 assert (request.url in urls_hitting_network or

498 not request.served_from_cache)	517 not request.served_from_cache)

499	518

500 run_metrics = {	519 run_metrics = {

501 'url': trace.url,	520 'url': trace.url,

(...skipping 63 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
565 dependencies=[self._common_builder.original_wpr_task])	584 dependencies=[self._common_builder.original_wpr_task])

566 def BuildPatchedWpr():	585 def BuildPatchedWpr():

567 common_util.EnsureParentDirectoryExists(BuildPatchedWpr.path)	586 common_util.EnsureParentDirectoryExists(BuildPatchedWpr.path)

568 shutil.copyfile(	587 shutil.copyfile(

569 self._common_builder.original_wpr_task.path, BuildPatchedWpr.path)	588 self._common_builder.original_wpr_task.path, BuildPatchedWpr.path)

570 wpr_archive = wpr_backend.WprArchiveBackend(BuildPatchedWpr.path)	589 wpr_archive = wpr_backend.WprArchiveBackend(BuildPatchedWpr.path)

571	590

572 # Save up original response headers.	591 # Save up original response headers.

573 original_response_headers = {e.url: e.GetResponseHeadersDict() \	592 original_response_headers = {e.url: e.GetResponseHeadersDict() \

574 for e in wpr_archive.ListUrlEntries()}	593 for e in wpr_archive.ListUrlEntries()}

	594 logging.info('save up response headers for %d resources',

	595 len(original_response_headers))

	596 if not original_response_headers:

	597 # TODO(gabadie): How is it possible to not even have the main resource

	598 # in the WPR archive?
	pasko 2016/07/04 15:55:18 please link to the bug (and preferably the comment please link to the bug (and preferably the comment # in the bug) here that lists the URL that can trigger this. ... otherwise this TODO will be harder than necessary to address in the future. gabadie 2016/07/04 17:03:43 Done. Show quoted text On 2016/07/04 15:55:18, pasko wrote: > please link to the bug (and preferably the comment # in the bug) here that lists > the URL that can trigger this. > > ... otherwise this TODO will be harder than necessary to address in the future. Done.
	599 raise sandwich_utils.SandwichKnownError(

	600 'Looks like no resources were recorded in WPR during: {}'.format(

	601 self._common_builder.original_wpr_task.name))

575 with open(self._original_headers_path, 'w') as file_output:	602 with open(self._original_headers_path, 'w') as file_output:

576 json.dump(original_response_headers, file_output)	603 json.dump(original_response_headers, file_output)

577	604

578 # Patch WPR.	605 # Patch WPR.

579 _PatchWpr(wpr_archive)	606 _PatchWpr(wpr_archive)

580 wpr_archive.Persist()	607 wpr_archive.Persist()

581	608

582 @self.RegisterTask('common/original-cache.zip', [BuildPatchedWpr])	609 @self.RegisterTask('common/original-cache.zip', [BuildPatchedWpr])

583 def BuildOriginalCache():	610 def BuildOriginalCache():

584 runner = self._common_builder.CreateSandwichRunner()	611 runner = self._common_builder.CreateSandwichRunner()

(...skipping 111 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
696 run_metrics_list = _ProcessRunOutputDir(	723 run_metrics_list = _ProcessRunOutputDir(

697 cache_validation_result, benchmark_setup, RunBenchmark.path)	724 cache_validation_result, benchmark_setup, RunBenchmark.path)

698 with open(ProcessRunOutputDir.path, 'w') as csv_file:	725 with open(ProcessRunOutputDir.path, 'w') as csv_file:

699 writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names +	726 writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names +

700 sandwich_metrics.COMMON_CSV_COLUMN_NAMES))	727 sandwich_metrics.COMMON_CSV_COLUMN_NAMES))

701 writer.writeheader()	728 writer.writeheader()

702 for trace_metrics in run_metrics_list:	729 for trace_metrics in run_metrics_list:

703 writer.writerow(trace_metrics)	730 writer.writerow(trace_metrics)

704	731

705 self._common_builder.default_final_tasks.append(ProcessRunOutputDir)	732 self._common_builder.default_final_tasks.append(ProcessRunOutputDir)

OLD	NEW

« no previous file with comments | « no previous file | tools/android/loading/sandwich_utils.py » ('j') | no next file with comments »