tools/android/loading/sandwich_prefetch.py - Issue 2112483002: sandwich: Fixes two sources of KeyError task failures

Side by Side Diff: tools/android/loading/sandwich_prefetch.py

Issue 2112483002: sandwich: Fixes two sources of KeyError task failures (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Addresses Egor's comments Created 4 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 # Copyright 2016 The Chromium Authors. All rights reserved.	1 # Copyright 2016 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 """	5 """

6 Implements a task builder for benchmarking effects of NoState Prefetch.	6 Implements a task builder for benchmarking effects of NoState Prefetch.

7 Noticeable steps of the task pipeline:	7 Noticeable steps of the task pipeline:

8 * Save a WPR archive	8 * Save a WPR archive

9 * Process the WPR archive to make all resources cacheable	9 * Process the WPR archive to make all resources cacheable

10 * Process cache archive to patch response headers back to their original	10 * Process cache archive to patch response headers back to their original

(...skipping 12 matching lines...) Expand all Loading...
23 import shutil	23 import shutil

24 from urlparse import urlparse	24 from urlparse import urlparse

25	25

26 import chrome_cache	26 import chrome_cache

27 import common_util	27 import common_util

28 import loading_trace	28 import loading_trace

29 from prefetch_view import PrefetchSimulationView	29 from prefetch_view import PrefetchSimulationView

30 from request_dependencies_lens import RequestDependencyLens	30 from request_dependencies_lens import RequestDependencyLens

31 import sandwich_metrics	31 import sandwich_metrics

32 import sandwich_runner	32 import sandwich_runner

	33 import sandwich_utils

33 import task_manager	34 import task_manager

34 import wpr_backend	35 import wpr_backend

35	36

36	37

37 class Discoverer(object):	38 class Discoverer(object):

38 # Do not prefetch anything.	39 # Do not prefetch anything.

39 EmptyCache = 'empty-cache'	40 EmptyCache = 'empty-cache'

40	41

41 # Prefetches everything to load fully from cache (impossible in practice).	42 # Prefetches everything to load fully from cache (impossible in practice).

42 FullCache = 'full-cache'	43 FullCache = 'full-cache'

(...skipping 151 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
194 logging.info('number of requests discovered by %s: %d',	195 logging.info('number of requests discovered by %s: %d',

195 subresource_discoverer, len(requests))	196 subresource_discoverer, len(requests))

196 return requests	197 return requests

197	198

198	199

199 def _PruneOutOriginalNoStoreRequests(original_headers_path, requests):	200 def _PruneOutOriginalNoStoreRequests(original_headers_path, requests):

200 with open(original_headers_path) as file_input:	201 with open(original_headers_path) as file_input:

201 original_headers = json.load(file_input)	202 original_headers = json.load(file_input)

202 pruned_requests = set()	203 pruned_requests = set()

203 for request in requests:	204 for request in requests:

204 request_original_headers = original_headers[request.url]	205 url = sandwich_utils.NormalizeUrl(request.url)

	206 if url not in original_headers:

	207 # TODO(gabadie): Investigate why these requests were not in WPR.

	208 assert request.failed

	209 logging.warning(

	210 'could not find original headers for: %s (failure: %s)',

	211 url, request.error_text)

	212 continue

	213 request_original_headers = original_headers[url]

205 if ('cache-control' in request_original_headers and	214 if ('cache-control' in request_original_headers and

206 'no-store' in request_original_headers['cache-control'].lower()):	215 'no-store' in request_original_headers['cache-control'].lower()):

207 pruned_requests.add(request)	216 pruned_requests.add(request)

208 return [r for r in requests if r not in pruned_requests]	217 return [r for r in requests if r not in pruned_requests]

209	218

210	219

211 def _ExtractDiscoverableUrls(	220 def _ExtractDiscoverableUrls(

212 original_headers_path, loading_trace_path, subresource_discoverer):	221 original_headers_path, loading_trace_path, subresource_discoverer):

213 """Extracts discoverable resource urls from a loading trace according to a	222 """Extracts discoverable resource urls from a loading trace according to a

214 sub-resource discoverer.	223 sub-resource discoverer.

(...skipping 259 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
474 logging.info('loading trace: %s', trace_path)	483 logging.info('loading trace: %s', trace_path)

475 trace = loading_trace.LoadingTrace.FromJsonFile(trace_path)	484 trace = loading_trace.LoadingTrace.FromJsonFile(trace_path)

476	485

477 logging.info('verifying trace: %s', trace_path)	486 logging.info('verifying trace: %s', trace_path)

478 run_output_verifier.VerifyTrace(trace)	487 run_output_verifier.VerifyTrace(trace)

479	488

480 logging.info('extracting metrics from trace: %s', trace_path)	489 logging.info('extracting metrics from trace: %s', trace_path)

481 served_from_network_bytes = 0	490 served_from_network_bytes = 0

482 served_from_cache_bytes = 0	491 served_from_cache_bytes = 0

483 urls_hitting_network = set()	492 urls_hitting_network = set()

	493 response_sizes = {}

484 for request in _FilterOutDataAndIncompleteRequests(	494 for request in _FilterOutDataAndIncompleteRequests(

485 trace.request_track.GetEvents()):	495 trace.request_track.GetEvents()):

486 # Ignore requests served from the blink's cache.	496 # Ignore requests served from the blink's cache.

487 if request.served_from_cache:	497 if request.served_from_cache:

488 continue	498 continue

489 urls_hitting_network.add(request.url)	499 urls_hitting_network.add(request.url)

490 if request.from_disk_cache:	500 if request.from_disk_cache:

491 served_from_cache_bytes += cached_encoded_data_lengths[request.url]	501 if request.url in cached_encoded_data_lengths:

	502 response_size = cached_encoded_data_lengths[request.url]

	503 else:

	504 # Some fat webpages may overflow the Memory cache, and so some

	505 # requests might be served from disk cache couple of times per page

	506 # load.

	507 logging.warning('Looks like could be served from memory cache: %s',

	508 request.url)

	509 response_size = response_sizes[request.url]

	510 served_from_cache_bytes += response_size

492 else:	511 else:

493 served_from_network_bytes += request.GetEncodedDataLength()	512 response_size = request.GetEncodedDataLength()

	513 served_from_network_bytes += response_size

	514 response_sizes[request.url] = response_size

494	515

495 # Make sure the served from blink's cache requests have at least one	516 # Make sure the served from blink's cache requests have at least one

496 # corresponding request that was not served from the blink's cache.	517 # corresponding request that was not served from the blink's cache.

497 for request in _FilterOutDataAndIncompleteRequests(	518 for request in _FilterOutDataAndIncompleteRequests(

498 trace.request_track.GetEvents()):	519 trace.request_track.GetEvents()):

499 assert (request.url in urls_hitting_network or	520 assert (request.url in urls_hitting_network or

500 not request.served_from_cache)	521 not request.served_from_cache)

501	522

502 run_metrics = {	523 run_metrics = {

503 'url': trace.url,	524 'url': trace.url,

(...skipping 63 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
567 dependencies=[self._common_builder.original_wpr_task])	588 dependencies=[self._common_builder.original_wpr_task])

568 def BuildPatchedWpr():	589 def BuildPatchedWpr():

569 common_util.EnsureParentDirectoryExists(BuildPatchedWpr.path)	590 common_util.EnsureParentDirectoryExists(BuildPatchedWpr.path)

570 shutil.copyfile(	591 shutil.copyfile(

571 self._common_builder.original_wpr_task.path, BuildPatchedWpr.path)	592 self._common_builder.original_wpr_task.path, BuildPatchedWpr.path)

572 wpr_archive = wpr_backend.WprArchiveBackend(BuildPatchedWpr.path)	593 wpr_archive = wpr_backend.WprArchiveBackend(BuildPatchedWpr.path)

573	594

574 # Save up original response headers.	595 # Save up original response headers.

575 original_response_headers = {e.url: e.GetResponseHeadersDict() \	596 original_response_headers = {e.url: e.GetResponseHeadersDict() \

576 for e in wpr_archive.ListUrlEntries()}	597 for e in wpr_archive.ListUrlEntries()}

	598 logging.info('save up response headers for %d resources',

	599 len(original_response_headers))

	600 if not original_response_headers:

	601 # TODO(gabadie): How is it possible to not even have the main resource

	602 # in the WPR archive? crbug.com/623966#c5
	pasko 2016/07/04 18:05:57 Example URL can be found in: http://crbug.com/6239 Example URL can be found in: http://crbug.com/623966#c5 gabadie 2016/07/06 08:57:55 Done. Show quoted text On 2016/07/04 18:05:57, pasko wrote: > Example URL can be found in: http://crbug.com/623966#c5 Done.
	603 raise sandwich_utils.SandwichKnownError(

	604 'Looks like no resources were recorded in WPR during: {}'.format(

	605 self._common_builder.original_wpr_task.name))

577 with open(self._original_headers_path, 'w') as file_output:	606 with open(self._original_headers_path, 'w') as file_output:

578 json.dump(original_response_headers, file_output)	607 json.dump(original_response_headers, file_output)

579	608

580 # Patch WPR.	609 # Patch WPR.

581 _PatchWpr(wpr_archive)	610 _PatchWpr(wpr_archive)

582 wpr_archive.Persist()	611 wpr_archive.Persist()

583	612

584 @self.RegisterTask('common/original-cache.zip', [BuildPatchedWpr])	613 @self.RegisterTask('common/original-cache.zip', [BuildPatchedWpr])

585 def BuildOriginalCache():	614 def BuildOriginalCache():

586 runner = self._common_builder.CreateSandwichRunner()	615 runner = self._common_builder.CreateSandwichRunner()

(...skipping 111 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
698 run_metrics_list = _ProcessRunOutputDir(	727 run_metrics_list = _ProcessRunOutputDir(

699 cache_validation_result, benchmark_setup, RunBenchmark.path)	728 cache_validation_result, benchmark_setup, RunBenchmark.path)

700 with open(ProcessRunOutputDir.path, 'w') as csv_file:	729 with open(ProcessRunOutputDir.path, 'w') as csv_file:

701 writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names +	730 writer = csv.DictWriter(csv_file, fieldnames=(additional_column_names +

702 sandwich_metrics.COMMON_CSV_COLUMN_NAMES))	731 sandwich_metrics.COMMON_CSV_COLUMN_NAMES))

703 writer.writeheader()	732 writer.writeheader()

704 for trace_metrics in run_metrics_list:	733 for trace_metrics in run_metrics_list:

705 writer.writerow(trace_metrics)	734 writer.writerow(trace_metrics)

706	735

707 self._common_builder.default_final_tasks.append(ProcessRunOutputDir)	736 self._common_builder.default_final_tasks.append(ProcessRunOutputDir)

OLD	NEW

« no previous file with comments | « no previous file | tools/android/loading/sandwich_utils.py » ('j') | tools/android/loading/sandwich_utils.py » ('J')