build/android/pylib/local/device/local_device_perf_test_run.py - Issue 2012323002: [Android] Implement perf tests to platform mode.

Side by Side Diff: build/android/pylib/local/device/local_device_perf_test_run.py

Issue 2012323002: [Android] Implement perf tests to platform mode. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: bug fixes and shuffling Created 4 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 # Copyright 2015 The Chromium Authors. All rights reserved.

	2 # Use of this source code is governed by a BSD-style license that can be

	3 # found in the LICENSE file.

	4

	5 import io

	6 import json

	7 import logging

	8 import os

	9 import pickle

	10 import re

	11 import shutil

	12 import sys

	13 import tempfile

	14 import threading

	15 import time

	16 import zipfile

	17

	18 from devil.android import battery_utils

	19 from devil.android import device_errors

	20 from devil.android import device_list

	21 from devil.android import device_utils

	22 from devil.android import forwarder

	23 from devil.utils import cmd_helper

	24 from devil.utils import reraiser_thread

	25 from devil.utils import watchdog_timer

	26 from pylib import constants

	27 from pylib.base import base_test_result

	28 from pylib.constants import host_paths

	29 from pylib.local.device import local_device_test_run

	30

	31

	32 # Regex for the master branch commit position.

	33 _GIT_CR_POS_RE = re.compile(r'^Cr-Commit-Position: refs/heads/master@{#(\d+)}$')

	34

	35

	36 class _HeartBeatLogger(object):

	37 # How often to print the heartbeat on flush().

	38 _PRINT_INTERVAL = 30.0

	39

	40 def __init__(self):

	41 """A file-like class for keeping the buildbot alive."""
	jbudorick 2016/06/01 20:46:06 What calls are taking so long that this is necessa What calls are taking so long that this is necessary? timeout_retry.py does something like this already, so perhaps this isn't necessary any more? https://code.google.com/p/chromium/codesearch#chromium/src/third_party/catapu... rnephew (Reviews Here) 2016/06/01 21:01:39 This was part of the old test runner, it might not Show quoted text On 2016/06/01 20:46:06, jbudorick wrote: > What calls are taking so long that this is necessary? > > timeout_retry.py does something like this already, so perhaps this isn't > necessary any more? > https://code.google.com/p/chromium/codesearch#chromium/src/third_party/catapu... This was part of the old test runner, it might not be required. I'll take it out and see how it reacts. My guess is that the page cycler tests were causing problems. rnephew (Reviews Here) 2016/06/01 22:14:15 Done. Show quoted text On 2016/06/01 21:01:39, rnephew (Reviews Here) wrote: > On 2016/06/01 20:46:06, jbudorick wrote: > > What calls are taking so long that this is necessary? > > > > timeout_retry.py does something like this already, so perhaps this isn't > > necessary any more? > > > https://code.google.com/p/chromium/codesearch#chromium/src/third_party/catapu... > > This was part of the old test runner, it might not be required. I'll take it out > and see how it reacts. My guess is that the page cycler tests were causing > problems. Done.
	42 self._len = 0

	43 self._tick = time.time()

	44 self._stopped = threading.Event()

	45 self._timer = threading.Thread(target=self._runner)

	46 self._timer.start()

	47

	48 def _runner(self):

	49 while not self._stopped.is_set():

	50 self.flush()

	51 self._stopped.wait(_HeartBeatLogger._PRINT_INTERVAL)

	52

	53 def write(self, data):

	54 self._len += len(data)

	55

	56 def flush(self):

	57 now = time.time()

	58 if now - self._tick >= _HeartBeatLogger._PRINT_INTERVAL:

	59 self._tick = now

	60 print '--single-step output length %d' % self._len

	61 sys.stdout.flush()

	62

	63 def stop(self):

	64 self._stopped.set()

	65

	66

	67 def _GetChromiumRevision():

	68 # pylint: disable=line-too-long

	69 """Get the git hash and commit position of the chromium master branch.

	70

	71 See: https://chromium.googlesource.com/chromium/tools/build/+/master/scripts/s lave/runtest.py#212

	72

	73 Returns:

	74 A dictionary with 'revision' and 'commit_pos' keys.

	75 """

	76 # pylint: enable=line-too-long

	77 status, output = cmd_helper.GetCmdStatusAndOutput(

	78 ['git', 'log', '-n', '1', '--pretty=format:%H%n%B', 'HEAD'],

	79 host_paths.DIR_SOURCE_ROOT)

	80 revision = None

	81 commit_pos = None

	82 if not status:

	83 lines = output.splitlines()

	84 revision = lines[0]

	85 for line in reversed(lines):

	86 m = _GIT_CR_POS_RE.match(line.strip())

	87 if m:

	88 commit_pos = int(m.group(1))

	89 break

	90 return {'revision': revision, 'commit_pos': commit_pos}

	91

	92

	93 class TestShard(object):

	94 def __init__(self, test_instance, device, index, tests, results, watcher=None,

	95 retries=3):

	96 logging.info('Create shard %s for device %s to run the following tests:',

	97 index, device)

	98 for t in tests:

	99 logging.info(' %s', t)

	100 self._battery = battery_utils.BatteryUtils(device)

	101 self._device = device

	102 self._index = index

	103 self._tests = tests

	104 self._watcher = watcher

	105 self._test_instance = test_instance

	106 self._output_dir = None

	107 self._results = results

	108 self._retries = retries

	109

	110 def _WriteBuildBotJson(self):
	jbudorick 2016/06/01 20:46:06 I don't think this should be part of the default o I don't think this should be part of the default operating mode. rnephew (Reviews Here) 2016/06/01 21:01:39 It currently is, and for this CL I would like runt Show quoted text On 2016/06/01 20:46:06, jbudorick wrote: > I don't think this should be part of the default operating mode. It currently is, and for this CL I would like runtime behavior to match old behavior as close as possible. I will add a flag to this though, that defaults to True, to write the buildbot json. Then we can make changes to the recipes to enable it then I can default it to not write the buildbot json. jbudorick 2016/06/01 21:03:20 to reiterate: maintaining old crufty behavior is a Show quoted text On 2016/06/01 21:01:39, rnephew (Reviews Here) wrote: > On 2016/06/01 20:46:06, jbudorick wrote: > > I don't think this should be part of the default operating mode. > > It currently is, and for this CL I would like runtime behavior to match old > behavior as close as possible. I will add a flag to this though, that defaults > to True, to write the buildbot json. Then we can make changes to the recipes to > enable it then I can default it to not write the buildbot json. to reiterate: maintaining old crufty behavior is a non-goal of platform mode. It's going to be behind a flag to begin with anyway. rnephew (Reviews Here) 2016/06/01 21:12:13 By my last comment I just meant I wanted the flag Show quoted text On 2016/06/01 21:03:20, jbudorick wrote: > On 2016/06/01 21:01:39, rnephew (Reviews Here) wrote: > > On 2016/06/01 20:46:06, jbudorick wrote: > > > I don't think this should be part of the default operating mode. > > > > It currently is, and for this CL I would like runtime behavior to match old > > behavior as close as possible. I will add a flag to this though, that defaults > > to True, to write the buildbot json. Then we can make changes to the recipes > to > > enable it then I can default it to not write the buildbot json. > > to reiterate: maintaining old crufty behavior is a non-goal of platform mode. > It's going to be behind a flag to begin with anyway. By my last comment I just meant I wanted the flag to be on by default at first. Make a flag for the output. Land this. Make changes to recipes to add the flag. Default the flag to off. That way we do not cause any problems on the bots. jbudorick 2016/06/01 21:16:09 and I meant that this entire CL should be behind - Show quoted text On 2016/06/01 21:12:13, rnephew (Reviews Here) wrote: > On 2016/06/01 21:03:20, jbudorick wrote: > > On 2016/06/01 21:01:39, rnephew (Reviews Here) wrote: > > > On 2016/06/01 20:46:06, jbudorick wrote: > > > > I don't think this should be part of the default operating mode. > > > > > > It currently is, and for this CL I would like runtime behavior to match old > > > behavior as close as possible. I will add a flag to this though, that > defaults > > > to True, to write the buildbot json. Then we can make changes to the recipes > > to > > > enable it then I can default it to not write the buildbot json. > > > > to reiterate: maintaining old crufty behavior is a non-goal of platform mode. > > It's going to be behind a flag to begin with anyway. > > By my last comment I just meant I wanted the flag to be on by default at first. > Make a flag for the output. > Land this. > Make changes to recipes to add the flag. > Default the flag to off. > > That way we do not cause any problems on the bots. and I meant that this entire CL should be behind --enable-platform-mode, which will also not cause bot problems. rnephew (Reviews Here) 2016/06/01 21:23:16 Oh, yeah. I planned to back out the changes in tes Show quoted text On 2016/06/01 21:16:09, jbudorick wrote: > On 2016/06/01 21:12:13, rnephew (Reviews Here) wrote: > > On 2016/06/01 21:03:20, jbudorick wrote: > > > On 2016/06/01 21:01:39, rnephew (Reviews Here) wrote: > > > > On 2016/06/01 20:46:06, jbudorick wrote: > > > > > I don't think this should be part of the default operating mode. > > > > > > > > It currently is, and for this CL I would like runtime behavior to match > old > > > > behavior as close as possible. I will add a flag to this though, that > > defaults > > > > to True, to write the buildbot json. Then we can make changes to the > recipes > > > to > > > > enable it then I can default it to not write the buildbot json. > > > > > > to reiterate: maintaining old crufty behavior is a non-goal of platform > mode. > > > It's going to be behind a flag to begin with anyway. > > > > By my last comment I just meant I wanted the flag to be on by default at > first. > > Make a flag for the output. > > Land this. > > Make changes to recipes to add the flag. > > Default the flag to off. > > > > That way we do not cause any problems on the bots. > > and I meant that this entire CL should be behind --enable-platform-mode, which > will also not cause bot problems. Oh, yeah. I planned to back out the changes in test_runner.py that make it go live before landing. I can land the flag in this cl and have it be a no-op until platform mode is enabled.
	111 """Write metadata about the buildbot environment to the output dir."""

	112 if not self._output_dir:

	113 return

	114 data = {

	115 'chromium': _GetChromiumRevision(),

	116 'environment': dict(os.environ)

	117 }

	118 with open(os.path.join(self._output_dir, 'buildbot.json'), 'w') as f:

	119 json.dump(data, f, sort_keys=True, indent=2, separators=(',', ': '))

	120

	121 def _TestSetUp(self):

	122 self._ResetWatcher()

	123 try:

	124 logging.info('Unmapping device ports.')
	jbudorick 2016/06/01 20:46:06 We should be doing unmapping at TearDown time, not We should be doing unmapping at TearDown time, not SetUp time. rnephew (Reviews Here) 2016/06/01 22:14:15 Done. Show quoted text On 2016/06/01 20:46:06, jbudorick wrote: > We should be doing unmapping at TearDown time, not SetUp time. Done.
	125 forwarder.Forwarder.UnmapAllDevicePorts(self._device)

	126 self._device.RestartAdbd()
	jbudorick 2016/06/01 20:46:06 I'm wondering if this is still necessary. I'm wondering if this is still necessary. rnephew (Reviews Here) 2016/06/01 21:01:39 I'll experiment locally. Show quoted text On 2016/06/01 20:46:06, jbudorick wrote: > I'm wondering if this is still necessary. I'll experiment locally. rnephew (Reviews Here) 2016/06/02 18:43:26 Local experimentation shows no detriment by gettin Show quoted text On 2016/06/01 21:01:39, rnephew (Reviews Here) wrote: > On 2016/06/01 20:46:06, jbudorick wrote: > > I'm wondering if this is still necessary. > > I'll experiment locally. Local experimentation shows no detriment by getting rid of it. Moved it to run only when a test fails.
	127 except Exception: # pylint: disable=broad-except

	128 logging.exception('Exception when resetting ports.')

	129

	130 self._BatteryLevelCheck()

	131 self._BatteryTempCheck()

	132 self._ScreenCheck()

	133

	134 if not self._device.IsOnline():

	135 msg = 'Device %s is unresponsive.' % str(self._device)

	136 logging.warning(msg)
	jbudorick 2016/06/01 20:46:06 This should not log if you're already raising an e This should not log if you're already raising an exception. It should instead be logged where the exception is caught. rnephew (Reviews Here) 2016/06/01 22:14:15 Done. Show quoted text On 2016/06/01 20:46:06, jbudorick wrote: > This should not log if you're already raising an exception. It should instead be > logged where the exception is caught. Done.
	137 raise device_errors.DeviceUnreachableError(msg)

	138

	139 def _CleanupOutputDirectory(self):

	140 if self._output_dir:

	141 shutil.rmtree(self._output_dir, ignore_errors=True)

	142 self._output_dir = None

	143

	144 def _CreateCmd(self, test):

	145 cmd = '%s --device %s' % (self._tests[test]['cmd'], str(self._device))

	146 if (self._test_instance.collect_chartjson_data

	147 or self._tests[test].get('archive_output_dir')):

	148 self._output_dir = tempfile.mkdtemp()

	149 cmd = cmd + ' --output-dir=%s' % self._output_dir

	150 if self._test_instance.dry_run:

	151 cmd = 'echo %s' % cmd

	152 return cmd

	153

	154 def _RunSingleTest(self, test):

	155

	156 logging.info('Running %s on shard %s', test, self._index)

	157 timeout = (

	158 None if self._test_instance.no_timeout

	159 else self._tests[test].get('timeout', 3600))

	160 logging.info('Timeout for %s test: %s', test, timeout)

	161

	162 logfile = sys.stdout

	163 if self._test_instance.single_step:

	164 logfile = _HeartBeatLogger()

	165 cmd = self._CreateCmd(test)

	166 self._WriteBuildBotJson()

	167 cwd = os.path.abspath(host_paths.DIR_SOURCE_ROOT)

	168 if cmd.startswith('src/'):

	169 cwd = os.path.abspath(os.path.join(host_paths.DIR_SOURCE_ROOT, os.pardir))

	170

	171 try:

	172 logging.debug('Running test with command \'%s\'', cmd)

	173 exit_code, output = cmd_helper.GetCmdStatusAndOutputWithTimeout(

	174 cmd, timeout, cwd=cwd, shell=True, logfile=logfile)

	175 json_output = self._test_instance.ReadChartjsonOutput(self._output_dir)

	176 except cmd_helper.TimeoutError as e:

	177 exit_code = -1

	178 output = e.output

	179 json_output = ''

	180 finally:

	181 if self._test_instance.single_step:

	182 logfile.stop()

	183 return cmd, exit_code, output, json_output

	184

	185 def _ProcessTestResult(

	186 self, test, cmd, start_time, end_time, exit_code, output, json_output):

	187 if exit_code is None:

	188 exit_code = -1

	189 logging.info('%s : exit_code=%d in %d secs on device %s',

	190 test, exit_code, end_time - start_time,

	191 str(self._device))

	192 if exit_code == 0:

	193 result_type = base_test_result.ResultType.PASS

	194 else:

	195 result_type = base_test_result.ResultType.FAIL

	196 # TODO(rnephew): Improve device recovery logic.

	197 try:

	198 self._device.WaitUntilFullyBooted(timeout=120)

	199 except device_errors.CommandTimeoutError:

	200 logging.exception('Device failed to return after %s.', test)

	201 actual_exit_code = exit_code

	202 if (self._test_instance.flaky_steps

	203 and test in self._test_instance.flaky_steps):

	204 exit_code = 0

	205 archive_bytes = (self._ArchiveOutputDir()

	206 if self._tests[test].get('archive_output_dir')

	207 else None)

	208 persisted_result = {

	209 'name': test,

	210 'output': [output],

	211 'chartjson': json_output,

	212 'archive_bytes': archive_bytes,

	213 'exit_code': exit_code,

	214 'actual_exit_code': actual_exit_code,

	215 'result_type': result_type,

	216 'start_time': start_time,

	217 'end_time': end_time,

	218 'total_time': end_time - start_time,

	219 'device': str(self._device),

	220 'cmd': cmd,

	221 }

	222 self._SaveResult(persisted_result)

	223 return result_type

	224

	225 def RunTestsOnShard(self):

	226 for test in self._tests:

	227 self._TestSetUp()

	228

	229 try:

	230 exit_code = None

	231 tries_left = self._retries

	232

	233 while exit_code != 0 and tries_left > 0:

	234 self._ResetWatcher()

	235 tries_left = tries_left - 1

	236 start_time = time.time()

	237 cmd, exit_code, output, json_output = self._RunSingleTest(test)

	238 end_time = time.time()

	239 result_type = self._ProcessTestResult(

	240 test, cmd, start_time, end_time, exit_code, output, json_output)

	241

	242 result = base_test_result.TestRunResults()

	243 result.AddResult(base_test_result.BaseTestResult(test, result_type))

	244 self._results.append(result)

	245 finally:

	246 self._CleanupOutputDirectory()

	247

	248 @staticmethod

	249 def _SaveResult(result):

	250 pickled = os.path.join(constants.PERF_OUTPUT_DIR, result['name'])

	251 if os.path.exists(pickled):

	252 with file(pickled, 'r') as f:

	253 previous = pickle.loads(f.read())

	254 result['output'] = previous['output'] + result['output']

	255 with file(pickled, 'w') as f:

	256 f.write(pickle.dumps(result))

	257

	258 def _ArchiveOutputDir(self):

	259 """Archive all files in the output dir, and return as compressed bytes."""

	260 with io.BytesIO() as archive:

	261 with zipfile.ZipFile(archive, 'w', zipfile.ZIP_DEFLATED) as contents:

	262 num_files = 0

	263 for absdir, _, files in os.walk(self._output_dir):

	264 reldir = os.path.relpath(absdir, self._output_dir)

	265 for filename in files:

	266 src_path = os.path.join(absdir, filename)

	267 # We use normpath to turn './file.txt' into just 'file.txt'.

	268 dst_path = os.path.normpath(os.path.join(reldir, filename))

	269 contents.write(src_path, dst_path)

	270 num_files += 1

	271 if num_files:

	272 logging.info('%d files in the output dir were archived.', num_files)

	273 else:

	274 logging.warning('No files in the output dir. Archive is empty.')

	275 return archive.getvalue()

	276

	277 def _ResetWatcher(self):

	278 if self._watcher:

	279 self._watcher.Reset()

	280

	281 def _BatteryLevelCheck(self):

	282 logging.info('Charge level: %s%%',

	283 str(self._battery.GetBatteryInfo().get('level')))

	284 if self._test_instance.min_battery_level:

	285 self._battery.ChargeDeviceToLevel(self._test_instance.min_battery_level)

	286

	287 def _ScreenCheck(self):

	288 if not self._device.IsScreenOn():

	289 self._device.SetScreen(True)

	290

	291 def _BatteryTempCheck(self):

	292 logging.info('temperature: %s (0.1 C)',

	293 str(self._battery.GetBatteryInfo().get('temperature')))

	294 if self._test_instance.max_battery_temp:

	295 self._battery.LetBatteryCoolToTemperature(

	296 self._test_instance.max_battery_temp)

	297

	298

	299 class LocalDevicePerfTestRun(local_device_test_run.LocalDeviceTestRun):

	300 def __init__(self, env, test_instance):

	301 super(LocalDevicePerfTestRun, self).__init__(env, test_instance)

	302 self._test_instance = test_instance

	303 self._env = env

	304 self._timeout = 10 * 60 # Ten minutes

	305 self._devices = None

	306 self._test_buckets = []

	307 self._watcher = None

	308

	309 def SetUp(self):

	310 self._devices = self._GetAllDevices(self._env.devices,

	311 self._test_instance.known_devices_file)

	312 self._watcher = watchdog_timer.WatchdogTimer(self._timeout)

	313

	314 def TearDown(self):

	315 pass

	316

	317 def _GetStepsFromDict(self):

	318 if self._test_instance.single_step:

	319 return {

	320 'version': 1,

	321 'steps': {

	322 'single_step': {

	323 'device_affinity': 0,

	324 'cmd': self._test_instance.single_step

	325 },

	326 }

	327 }

	328 if self._test_instance.steps:

	329 with file(self._test_instance.steps, 'r') as f:

	330 steps = json.load(f)

	331 assert steps['version'] == 1

	332 return steps

	333

	334 def _SplitTestsByAffinity(self):

	335 test_dict = self._GetStepsFromDict()

	336 for test in test_dict['steps']:

	337 affinity = test_dict['steps'][test]['device_affinity']

	338 if len(self._test_buckets) < affinity + 1:

	339 while len(self._test_buckets) != affinity + 1:

	340 self._test_buckets.append({})

	341 self._test_buckets[affinity][test] = test_dict['steps'][test]

	342 return self._test_buckets

	343

	344 @staticmethod

	345 def _GetAllDevices(active_devices, devices_path):

	346 if not devices_path:
	jbudorick 2016/06/01 20:46:06 What is going on here with the double devices_path What is going on here with the double devices_path check? rnephew (Reviews Here) 2016/06/01 22:14:15 Done. Show quoted text On 2016/06/01 20:46:06, jbudorick wrote: > What is going on here with the double devices_path check? Done.
	347 logging.warning('Known devices file path not being passed. For device '

	348 'affinity to work properly, it must be passed.')

	349 try:

	350 if devices_path:

	351 devices = [device_utils.DeviceUtils(s)

	352 for s in device_list.GetPersistentDeviceList(devices_path)]

	353 else:

	354 logging.warning('Known devices file path not being passed. For device '

	355 'affinity to work properly, it must be passed.')

	356 devices = active_devices

	357 except IOError as e:

	358 logging.error('Unable to find %s [%s]', devices_path, e)

	359 devices = active_devices

	360 return sorted(devices)

	361

	362

	363 def RunTests(self):

	364 # Option selected for saving a json file with a list of test names.

	365 if self._test_instance.output_json_list:

	366 return self._test_instance.OutputJsonList()

	367

	368 # Just print the results from a single previously executed step.

	369 if self._test_instance.print_step:

	370 return self._test_instance.PrintTestOutput()

	371

	372 # Affinitize the tests.

	373 test_buckets = self._SplitTestsByAffinity()

	374 if not test_buckets:

	375 raise NotImplementedError('No tests found!')
	jbudorick 2016/06/01 20:46:06 NotImplementedError is the wrong exception type he NotImplementedError is the wrong exception type here. rnephew (Reviews Here) 2016/06/01 22:14:15 Done. Show quoted text On 2016/06/01 20:46:06, jbudorick wrote: > NotImplementedError is the wrong exception type here. Done.
	376

	377 threads = []

	378 results = []

	379 for x in xrange(min(len(self._devices), len(test_buckets))):

	380 new_shard = TestShard(self._test_instance, self._devices[x], x,

	381 test_buckets[x], results, watcher=self._watcher)

	382 threads.append(reraiser_thread.ReraiserThread(new_shard.RunTestsOnShard))

	383

	384 workers = reraiser_thread.ReraiserThreadGroup(threads)

	385 workers.StartAll()

	386

	387 try:

	388 workers.JoinAll(self._watcher)

	389 except device_errors.CommandFailedError:

	390 logging.exception('Command failed on device.')

	391 except device_errors.CommandTimeoutError:

	392 logging.exception('Command timed out on device.')

	393 except device_errors.DeviceUnreachableError:

	394 logging.exception('Device became unreachable.')

	395 return results

	396

	397 # override

	398 def TestPackage(self):

	399 return 'Perf'

	400

	401 # override

	402 def _CreateShards(self, _tests):

	403 raise NotImplementedError

	404

	405 # override

	406 def _GetTests(self):

	407 return self._test_buckets

	408

	409 # override

	410 def _RunTest(self, _device, _test):

	411 raise NotImplementedError

	412

	413 # override

	414 def _ShouldShard(self):

	415 return False

OLD	NEW