tools/android/loading/sandwich_tasks.py - Issue 1872313002: sandwich: Implement SandwichTaskBuilder

Unified Diff: tools/android/loading/sandwich_tasks.py

Issue 1872313002: sandwich: Implement SandwichTaskBuilder (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 4 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: tools/android/loading/sandwich_tasks.py

diff --git a/tools/android/loading/sandwich_tasks.py b/tools/android/loading/sandwich_tasks.py

new file mode 100644

index 0000000000000000000000000000000000000000..660231e0308589dde30f39a0664746307e60c414

--- /dev/null

+++ b/tools/android/loading/sandwich_tasks.py

@@ -0,0 +1,298 @@

+# Use of this source code is governed by a BSD-style license that can be

+# found in the LICENSE file.

+import csv

+import json

+import logging

+import os

+import re

+import shutil

+import chrome_cache

+import emulation

+import loading_trace

+import loading_trace_analyzer

+import sandwich_metrics

+import sandwich_misc

+from sandwich_runner import SandwichRunner

+import task_manager

+def NoRunnerModification(runner):

pasko 2016/04/11 14:54:08 This would be a NoOpTransformer, but actually I wo

gabadie 2016/04/13 09:53:44 Done.

+ """Callback that don't modify a sandwich runner.

+ Args:

+ runner: A SandwichRunner to modify.

+ """

+ assert isinstance(runner, SandwichRunner)

+def EmulateNetworkModifier(network_condition):

pasko 2016/04/11 14:54:08 As mentioned before, NetworkSimulationTransformer

gabadie 2016/04/13 09:53:44 Done.

+ """Factory that create callbacks to modify a sandwich runner for a specific

pasko 2016/04/11 14:54:08 Creates a function that accepts a SandwichRunner a

gabadie 2016/04/13 09:53:44 Done.

+ browser sided network emulation.

+ Args:

+ network_condition: The network condition to apply to the sandwich runner.

+ Returns:

+ A callback modifying the runner given in argument accordingly

pasko 2016/04/11 14:54:08 needs to mention |SandwichRunner|

gabadie 2016/04/13 09:53:44 Done.

+ """

+ assert network_condition in emulation.NETWORK_CONDITIONS

+ def RunnerModifier(runner):

pasko 2016/04/11 14:54:09 s/runner/sandwich_runner/

gabadie 2016/04/13 09:53:44 Done.

+ runner.network_condition = network_condition

pasko 2016/04/11 14:54:08 assert isinstance(sandwich_runner, SandwichRunner)

gabadie 2016/04/13 09:53:45 Done.

+ return RunnerModifier

+class SandwichTaskBuilder(task_manager.Builder):

+ """Sandwich's tasks builder."""

pasko 2016/04/11 14:54:09 Needs more documentation. Something like: """A bu

gabadie 2016/04/13 09:53:44 Done.

+ def __init__(self, output_directory, job_path, url_repeat):

+ """Constructor.

+ Args:

+ output_directory: Output directory where the dynamic tasks will be

pasko 2016/04/11 14:54:09 output_directory: As in task_manager.Builder.__ini

gabadie 2016/04/13 09:53:44 Oups... Done!

+ generated to.

+ """

+ task_manager.Builder.__init__(self, output_directory)

+ self._job_path = job_path

+ self._url_repeat = url_repeat

+ self.default_final_tasks = []

pasko 2016/04/11 14:54:08 we want to avoid callers of this class from modify

gabadie 2016/04/13 09:53:44 used a @proprety. Done.

pasko 2016/04/14 12:34:42 did you want to allow users of the object to overw

gabadie 2016/04/14 15:43:32 Sorry, I still don't see the issue with a simple g

+ def __enter__(self):

pasko 2016/04/11 14:54:08 not needed?

gabadie 2016/04/13 09:53:44 My bad. Done

+ return self

+ def __exit__(self, exc_type, exc_val, exc_tb):

pasko 2016/04/11 14:54:08 not needed any more?

gabadie 2016/04/13 09:53:44 Done.

+ pass

+ def _CreateRunner(self):

pasko 2016/04/11 14:54:08 This methd is called only once, it is preferable t

gabadie 2016/04/13 09:53:44 Done.

+ """Create a runner that may be used for benchmark purposes."""

+ runner = self._CreateNonBenchmarkRunner()

+ runner.record_video = True

+ runner.job_repeat = self._url_repeat

+ return runner

+ def _CreateNonBenchmarkRunner(self):

pasko 2016/04/11 14:54:09 _CreateSandwichRunner to be clearer

gabadie 2016/04/13 09:53:45 Done.

+ """Create a runner for non benchmark purposes."""

+ runner = SandwichRunner()

+ runner.LoadJob(self._job_path)

+ return runner

+ def SetOriginalWprPath(self, original_wpr_path):

pasko 2016/04/11 14:54:08 It is not clear what 'original' refers to. It woul

gabadie 2016/04/13 09:53:45 Done.

+ """Sets the original WPR archive path's to be used.

+ Args:

+ original_wpr_path: Path of the original WPR archive to be used.

+ """

+ return self.CreateStaticTask('webpages.wpr', original_wpr_path)

pasko 2016/04/11 14:54:09 'webpages.wpr' -> _WPR_ARCHIVE_NAME as a constant

gabadie 2016/04/13 09:53:44 Done.

+ def PopulateWPRRecordingTask(self):

pasko 2016/04/11 14:54:09 naming: s/WPR/Wpr/

gabadie 2016/04/13 09:53:45 Done.

+ """Records the original WPR archive."""

+ @self.RegisterTask('webpages.wpr')

+ def BuildOriginalWpr():

+ runner = self._CreateNonBenchmarkRunner()

+ runner.wpr_archive_path = BuildOriginalWpr.path

+ runner.wpr_record = True

+ runner.Run()

+ return BuildOriginalWpr

+ def PopulateCommonPipelines(self):

+ """Populates the pipeline that create the reference cache archive and list

pasko 2016/04/11 14:54:08 It makes sense to explain in each Populate* what s

gabadie 2016/04/13 09:53:45 Done.

+ of sub-resources per urls.

+ Returns:

+ The last task of the pipeline.

+ """

+ original_wpr_task = self.tasks['webpages.wpr']

+ @self.RegisterTask('webpages-patched.wpr', [original_wpr_task])

+ def BuildPatchedWpr():

+ shutil.copyfile(original_wpr_task.path, BuildPatchedWpr.path)

+ sandwich_misc.PatchWpr(BuildPatchedWpr.path)

+ @self.RegisterTask('cache-ref.zip', [BuildPatchedWpr])

+ def BuildReferenceCache():

+ runner = self._CreateNonBenchmarkRunner()

+ runner.wpr_archive_path = BuildPatchedWpr.path

+ runner.cache_archive_path = BuildReferenceCache.path

+ runner.cache_operation = 'save'

+ runner.Run()

+ # TODO(gabadie): It could be possible to generate the traces in the

+ # cache-ref.zip run, but we would need to implement an octopus dynamic

+ # task that can generate several files at a time.

pasko 2016/04/11 14:54:09 It is not clear what this TODO proposes, maybe rem

gabadie 2016/04/13 09:53:44 Done.

+ @self.RegisterTask('urls-resources-run/', [original_wpr_task])

+ def UrlsResourcesRun():

+ runner = self._CreateNonBenchmarkRunner()

+ runner.wpr_archive_path = original_wpr_task.path

+ runner.cache_operation = 'clear'

+ runner.trace_output_directory = UrlsResourcesRun.path

+ runner.Run()

+ @self.RegisterTask('urls-resources.json', [UrlsResourcesRun])

pasko 2016/04/11 14:54:08 subresources-for-urls.json would wake it clear whe

gabadie 2016/04/13 09:53:44 Done.

+ def ListUrlsResources():

+ json_content = sandwich_misc.ListResourcesUrls(UrlsResourcesRun.path)

+ with open(ListUrlsResources.path, 'w') as output:

+ json.dump(json_content, output)

+ @self.RegisterTask('cache-ref-validation.log',

+ [BuildReferenceCache, ListUrlsResources])

+ def ValidateReferenceCache():

+ json_content = json.load(open(ListUrlsResources.path))

+ ref_urls = set()

+ for urls in json_content.values():

+ ref_urls.update(set(urls))

+ sandwich_misc.ValidateCacheArchiveContent(

+ ref_urls, BuildReferenceCache.path)

+ self.default_final_tasks.append(ValidateReferenceCache)

+ return ValidateReferenceCache

+ def PopulateBenchmarkPipeline(self, setup_task, runner_modifier,

+ benchmark_name):

+ """Populate the a benchmark's pipeline from it's setup tasks.

+ Args:

+ setup_task: The benchmark's setup task.

+ runner_modifier: A callback to modify the sandwich runner.

+ benchmark_name: The benchmark's name for that runner modifier.

+ Returns:

+ The last task of the pipeline.

+ """

+ assert setup_task.name.endswith('-setup.json'), \

+ 'Task \'{}\' is not a benchmark setup.'.format(setup_task.name)

+ benchmark_familly_name = setup_task.name[:-len('-setup.json')]

+ patched_wpr_task = self.tasks['webpages-patched.wpr']

+ reference_cache_task = self.tasks['cache-ref.zip']

+ @self.RegisterTask(benchmark_familly_name + '-cache.zip',

+ dependencies=[setup_task, reference_cache_task],

+ merge=True)

+ def BuildBenchmarkCacheArchive():

+ setup = json.load(open(setup_task.path))

+ chrome_cache.ApplyUrlWhitelistToCacheArchive(

+ cache_archive_path=reference_cache_task.path,

+ whitelisted_urls=setup['cache_whitelist'],

+ output_cache_archive_path=BuildBenchmarkCacheArchive.path)

+ @self.RegisterTask(benchmark_name + '-run/',

+ dependencies=[BuildBenchmarkCacheArchive])

+ def RunBenchmark():

+ runner = self._CreateRunner()

+ runner_modifier(runner)

+ runner.wpr_archive_path = patched_wpr_task.path

+ runner.wpr_out_log_path = os.path.join(RunBenchmark.path, 'wpr.log')

+ runner.cache_archive_path = BuildBenchmarkCacheArchive.path

+ runner.cache_operation = 'push'

+ runner.trace_output_directory = RunBenchmark.path

+ runner.Run()

+ @self.RegisterTask(benchmark_name + '-metrics.csv',

+ dependencies=[RunBenchmark])

+ def ExtractMetrics():

+ sandwich_misc.VerifyBenchmarkOutputDirectory(

+ setup_task.path, RunBenchmark.path)

+ trace_metrics_list = sandwich_metrics.PullMetricsFromOutputDirectory(

+ RunBenchmark.path)

+ trace_metrics_list.sort(key=lambda e: e['id'])

+ with open(ExtractMetrics.path, 'w') as csv_file:

+ writer = csv.DictWriter(csv_file,

+ fieldnames=sandwich_metrics.CSV_FIELD_NAMES)

+ writer.writeheader()

+ for trace_metrics in trace_metrics_list:

+ writer.writerow(trace_metrics)

+ self.default_final_tasks.append(ExtractMetrics)

+ return ExtractMetrics

+ def PopulateFullCacheLoadBenchmark(self, benchmark_name='fullcache',

+ runner_modifier=NoRunnerModification):

+ """Populates the full cache load benchmark's pipeline.

+ Args:

pasko 2016/04/11 14:54:08 need args here

gabadie 2016/04/13 09:53:44 My bad. Done.

+ Returns:

+ The last task of the pipeline.

+ """

+ urls_resources_task = self.tasks['urls-resources.json']

+ @self.RegisterTask('fullcache-setup.json',

+ dependencies=[urls_resources_task],

+ merge=True)

+ def SetupBenchmark():

+ urls_resources = json.load(open(urls_resources_task.path))

+ assert len(urls_resources) == 1, \

+ "This recipe is not ready for multiple urls."

+ url = urls_resources.keys()[0]

+ url_resources = urls_resources[url]

+ with open(SetupBenchmark.path, 'w') as output:

+ json.dump({

+ 'cache_whitelist': url_resources,

+ 'url_resources': url_resources,

+ }, output)

+ return self.PopulateBenchmarkPipeline(

+ SetupBenchmark, runner_modifier, benchmark_name)

+ def PopulateClearCacheLoadBenchmark(self, benchmark_name='clearcache',

+ runner_modifier=NoRunnerModification):

+ """Populates the clear cache load benchmark's pipeline.

+ Returns:

+ The last task of the pipeline.

+ """

+ urls_resources_task = self.tasks['urls-resources.json']

+ @self.RegisterTask('clearcache-setup.json',

+ dependencies=[urls_resources_task],

+ merge=True)

+ def SetupBenchmark():

+ urls_resources = json.load(open(urls_resources_task.path))

+ assert len(urls_resources) == 1, \

+ "This recipe is not ready for multiple urls."

+ url = urls_resources.keys()[0]

+ url_resources = urls_resources[url]

+ with open(SetupBenchmark.path, 'w') as output:

+ json.dump({

+ 'cache_whitelist': [],

+ 'url_resources': url_resources,

+ }, output)

+ return self.PopulateBenchmarkPipeline(

+ SetupBenchmark, runner_modifier, benchmark_name)

+ def PopulateNoStatePrefetchLoadBenchmark(self, benchmark_name='prefetch',

+ runner_modifier=NoRunnerModification):

+ """Populates the NoState-Prefetch load benchmark's pipeline.

+ Returns:

+ The last task of the pipeline.

+ """

+ # TODO(gabadie): make it generic for the different sub-resource discoverer.

+ urls_resources_run_task = self.tasks['urls-resources-run/']

pasko 2016/04/11 14:54:08 urls-resources-run/ and urls-resources.json are us

gabadie 2016/04/13 09:53:44 Done.

+ urls_resources_task = self.tasks['urls-resources.json']

+ @self.RegisterTask('prefetch-setup.json',

+ dependencies=[urls_resources_task],

+ merge=True)

+ def SetupBenchmark():

+ trace_path = os.path.join(urls_resources_run_task.path, '0/trace.json')

+ whitelisted_urls = sandwich_misc.ExtractDiscoverableUrls(

+ trace_path, sandwich_misc.HTML_PRELOAD_SCANNER_DISCOVERER)

+ urls_resources = json.load(open(urls_resources_task.path))

+ assert len(urls_resources) == 1, \

+ "This recipe is not ready for multiple urls."

+ url = urls_resources.keys()[0]

+ url_resources = urls_resources[url]

+ with open(SetupBenchmark.path, 'w') as output:

+ json.dump({

+ 'cache_whitelist': [url for url in whitelisted_urls],

+ 'url_resources': url_resources,

+ }, output)

+ return self.PopulateBenchmarkPipeline(

+ SetupBenchmark, runner_modifier, benchmark_name)

« tools/android/loading/sandwich.py ('K') | « tools/android/loading/sandwich_misc.py ('k') | no next file » | no next file with comments »