build/android/pylib/local/device/local_device_perf_test_run.py - Issue 2012323002: [Android] Implement perf tests to platform mode.

Side by Side Diff: build/android/pylib/local/device/local_device_perf_test_run.py

Issue 2012323002: [Android] Implement perf tests to platform mode. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: add retry logic and some clean up Created 4 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« build/android/pylib/base/test_run_factory.py ('K') | « build/android/pylib/base/test_run_factory.py ('k') | build/android/pylib/perf/perf_test_instance.py » ('j') | build/android/pylib/perf/perf_test_instance.py » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 # Copyright 2015 The Chromium Authors. All rights reserved.
	mikecase (-- gone --) 2016/06/01 17:40:27 nit 2016 nit 2016 rnephew (Reviews Here) 2016/06/01 20:32:04 Done. Show quoted text On 2016/06/01 17:40:27, mikecase wrote: > nit 2016 Done.
	2 # Use of this source code is governed by a BSD-style license that can be

	3 # found in the LICENSE file.

	4

	5 import io

	6 import json

	7 import logging

	8 import os

	9 import pickle

	10 import re

	11 import shutil

	12 import sys

	13 import tempfile

	14 import threading

	15 import time

	16 import zipfile

	17

	18 from devil.android import battery_utils

	19 from devil.android import device_errors

	20 from devil.android import device_list

	21 from devil.android import device_utils

	22 from devil.android import forwarder

	23 from devil.utils import cmd_helper

	24 from devil.utils import reraiser_thread

	25 from devil.utils import watchdog_timer

	26 from pylib import constants

	27 from pylib.base import base_test_result

	28 from pylib.constants import host_paths

	29 from pylib.local.device import local_device_test_run

	30

	31

	32 # Regex for the master branch commit position.

	33 _GIT_CR_POS_RE = re.compile(r'^Cr-Commit-Position: refs/heads/master@{#(\d+)}$')

	34

	35

	36 class _HeartBeatLogger(object):

	37 # How often to print the heartbeat on flush().
	mikecase (-- gone --) 2016/06/01 17:40:28 nit: add "in <time_unit>" (Im guessing it is secon nit: add "in <time_unit>" (Im guessing it is seconds) to the end of comment. rnephew (Reviews Here) 2016/06/01 20:32:04 Done. Show quoted text On 2016/06/01 17:40:28, mikecase wrote: > nit: add "in <time_unit>" (Im guessing it is seconds) to the end of comment. Done.
	38 _PRINT_INTERVAL = 30.0

	39

	40 def __init__(self):

	41 """A file-like class for keeping the buildbot alive."""

	42 self._len = 0

	43 self._tick = time.time()

	44 self._stopped = threading.Event()

	45 self._timer = threading.Thread(target=self._runner)

	46 self._timer.start()

	47

	48 def _runner(self):

	49 while not self._stopped.is_set():

	50 self.flush()

	51 self._stopped.wait(_HeartBeatLogger._PRINT_INTERVAL)

	52

	53 def write(self, data):

	54 self._len += len(data)

	55

	56 def flush(self):

	57 now = time.time()

	58 if now - self._tick >= _HeartBeatLogger._PRINT_INTERVAL:

	59 self._tick = now

	60 print '--single-step output length %d' % self._len
	mikecase (-- gone --) 2016/06/01 17:40:28 Since you are doing this on a separate thread I wo Since you are doing this on a separate thread I would probably use the logging module which is thread-safe. rnephew (Reviews Here) 2016/06/01 20:32:04 Done. Show quoted text On 2016/06/01 17:40:28, mikecase wrote: > Since you are doing this on a separate thread I would probably use the logging > module which is thread-safe. Done.
	61 sys.stdout.flush()

	62

	63 def stop(self):

	64 self._stopped.set()

	65

	66

	67 def _GetChromiumRevision():

	68 # pylint: disable=line-too-long

	69 """Get the git hash and commit position of the chromium master branch.

	70

	71 See: https://chromium.googlesource.com/chromium/tools/build/+/master/scripts/s lave/runtest.py#212
	mikecase (-- gone --) 2016/06/01 17:40:28 This comment is probably going to get out of date This comment is probably going to get out of date since the line number could change. You "could" point towards a specific revision instead of master, but I think that is also unwise. Idk, I would probably just remove think link.
	72

	73 Returns:

	74 A dictionary with 'revision' and 'commit_pos' keys.

	75 """

	76 # pylint: enable=line-too-long

	77 status, output = cmd_helper.GetCmdStatusAndOutput(
	mikecase (-- gone --) 2016/06/01 17:40:28 you will have to set cwd=CHROMIUM_SRC_DIR or somet you will have to set cwd=CHROMIUM_SRC_DIR or something for this. Otherwise, if someone runs the test_runner from a different directory, they might not even be in a git repo rnephew (Reviews Here) 2016/06/01 20:32:04 It already does, the host_paths.DIR_SOURCE_ROOT pa Show quoted text On 2016/06/01 17:40:28, mikecase wrote: > you will have to set cwd=CHROMIUM_SRC_DIR or something for this. Otherwise, if > someone runs the test_runner from a different directory, they might not even be > in a git repo It already does, the host_paths.DIR_SOURCE_ROOT part is cwd=. I explicitly state it now though, since it is an optional thing.
	78 ['git', 'log', '-n', '1', '--pretty=format:%H%n%B', 'HEAD'],

	79 host_paths.DIR_SOURCE_ROOT)

	80 revision = None

	81 commit_pos = None

	82 if not status:

	83 lines = output.splitlines()

	84 revision = lines[0]

	85 for line in reversed(lines):

	86 m = _GIT_CR_POS_RE.match(line.strip())

	87 if m:

	88 commit_pos = int(m.group(1))

	89 break

	90 return {'revision': revision, 'commit_pos': commit_pos}

	91

	92

	93 class TestShard(object):

	94 def __init__(self, test_instance, device, index, tests, results, watcher=None,

	95 retries=3):

	96 logging.info('Create shard %s for device %s to run the following tests:',

	97 index, device)

	98 for t in tests:

	99 logging.info(' %s', t)

	100 self._battery = battery_utils.BatteryUtils(device)

	101 self._device = device

	102 self._index = index

	103 self._tests = tests

	104 self._watcher = watcher

	105 self._test_instance = test_instance

	106 self._output_dir = None

	107 self._results = results

	108 self._retries = retries
	mikecase (-- gone --) 2016/06/01 17:40:28 nit: probably just alphabetize all of the things O nit: probably just alphabetize all of the things OR put then in the order they are listed out as args. I think either of those makes sense. rnephew (Reviews Here) 2016/06/01 20:32:05 Done. Show quoted text On 2016/06/01 17:40:28, mikecase wrote: > nit: probably just alphabetize all of the things OR put then in the order they > are listed out as args. I think either of those makes sense. Done.
	109

	110 def _WriteBuildBotJson(self):

	111 """Write metadata about the buildbot environment to the output dir."""

	112 if not self._output_dir:

	113 return

	114 data = {

	115 'chromium': _GetChromiumRevision(),

	116 'environment': dict(os.environ)

	117 }

	118 with open(os.path.join(self._output_dir, 'buildbot.json'), 'w') as f:

	119 json.dump(data, f, sort_keys=True, indent=2, separators=(',', ': '))

	120

	121 def _TestSetUp(self):

	122 self._ResetWatcher()

	123 try:

	124 logging.info('Unmapping device ports.')

	125 forwarder.Forwarder.UnmapAllDevicePorts(self._device)

	126 self._device.RestartAdbd()

	127 except Exception: # pylint: disable=broad-except

	128 logging.exception('Exception when resetting ports.')
	mikecase (-- gone --) 2016/06/01 17:40:27 I would say this is fine except you also have Rest I would say this is fine except you also have RestartAdbd() in the try block. This could output a very misleading error message if that is the things that raises the exception. rnephew (Reviews Here) 2016/06/01 20:32:04 Done. Show quoted text On 2016/06/01 17:40:27, mikecase wrote: > I would say this is fine except you also have RestartAdbd() in the try block. > This could output a very misleading error message if that is the things that > raises the exception. Done.
	129

	130 self._BatteryLevelCheck()

	131 self._BatteryTempCheck()

	132 self._ScreenCheck()

	133

	134 if not self._device.IsOnline():

	135 msg = 'Device %s is unresponsive.' % str(self._device)

	136 logging.warning(msg)

	137 raise device_errors.DeviceUnreachableError(msg)

	138 self._ResetWatcher()

	139

	140 def _CleanupOutputDirectory(self):

	141 if self._output_dir:

	142 shutil.rmtree(self._output_dir, ignore_errors=True)

	143 self._output_dir = None

	144

	145 def _CreateCmd(self, test):

	146 cmd = '%s --device %s' % (self._tests[test]['cmd'], str(self._device))

	147 if (self._test_instance.collect_chartjson_data

	148 or self._tests[test].get('archive_output_dir')):

	149 self._output_dir = tempfile.mkdtemp()

	150 cmd = cmd + ' --output-dir=%s' % self._output_dir

	151 if self._test_instance.dry_run:

	152 cmd = 'echo %s' % cmd

	153 return cmd

	154

	155 def _ReadChartjsonOutput(self):

	156 if not self._output_dir:

	157 return ''

	158 json_output_path = os.path.join(self._output_dir, 'results-chart.json')

	159 try:

	160 with open(json_output_path) as f:

	161 return f.read()

	162 except IOError:

	163 logging.exception('Exception when reading chartjson.')

	164 logging.error('This usually means that telemetry did not run, so it could'

	165 ' not generate the file. Please check the device running'

	166 ' the test.')

	167 return ''

	168

	169 def _RunSingleTest(self, test):

	170

	171 logging.info('Running %s on shard %s', test, self._index)

	172 timeout = (

	173 None if self._test_instance.no_timeout

	174 else self._tests[test].get('timeout', 3600))
	mikecase (-- gone --) 2016/06/01 17:40:27 probably factor out this 3600 somewhere and make i probably factor out this 3600 somewhere and make it a _CONSTANT rnephew (Reviews Here) 2016/06/01 20:32:05 Switched it to use the class varialbe _timeout whi Show quoted text On 2016/06/01 17:40:27, mikecase wrote: > probably factor out this 3600 somewhere and make it a _CONSTANT Switched it to use the class varialbe _timeout which is also used by the watchdog timer.
	175 logging.info('Timeout for %s test: %s', test, timeout)

	176

	177 logfile = sys.stdout

	178 if self._test_instance.single_step:

	179 logfile = _HeartBeatLogger()

	180 cmd = self._CreateCmd(test)

	181 self._WriteBuildBotJson()

	182 cwd = os.path.abspath(host_paths.DIR_SOURCE_ROOT)

	183 if cmd.startswith('src/'):

	184 cwd = os.path.abspath(os.path.join(host_paths.DIR_SOURCE_ROOT, os.pardir))
	mikecase (-- gone --) 2016/06/01 17:40:28 Im not a fan of things like this. Probably would b Im not a fan of things like this. Probably would be nice to work to remove the need for this. So some tests just have to be run from a certain directory? rnephew (Reviews Here) 2016/06/01 20:32:05 This is from the old test runner. I want to eventu Show quoted text On 2016/06/01 17:40:28, mikecase wrote: > Im not a fan of things like this. Probably would be nice to work to remove the > need for this. So some tests just have to be run from a certain directory? This is from the old test runner. I want to eventually remove this, but do not want to break anything. I think I will add a logging message that this is triggered, and fix any place that uses it. jbudorick 2016/06/01 20:46:06 In general, I'd prefer to make these breaking sani Show quoted text On 2016/06/01 20:32:05, rnephew (Reviews Here) wrote: > On 2016/06/01 17:40:28, mikecase wrote: > > Im not a fan of things like this. Probably would be nice to work to remove the > > need for this. So some tests just have to be run from a certain directory? > > This is from the old test runner. I want to eventually remove this, but do not > want to break anything. I think I will add a logging message that this is > triggered, and fix any place that uses it. In general, I'd prefer to make these breaking sanity changes/fixes in the transition to platform mode. rnephew (Reviews Here) 2016/06/01 22:14:14 Done. Show quoted text On 2016/06/01 20:46:06, jbudorick wrote: > On 2016/06/01 20:32:05, rnephew (Reviews Here) wrote: > > On 2016/06/01 17:40:28, mikecase wrote: > > > Im not a fan of things like this. Probably would be nice to work to remove > the > > > need for this. So some tests just have to be run from a certain directory? > > > > This is from the old test runner. I want to eventually remove this, but do not > > want to break anything. I think I will add a logging message that this is > > triggered, and fix any place that uses it. > > In general, I'd prefer to make these breaking sanity changes/fixes in the > transition to platform mode. Done.
	185

	186 try:

	187 logging.debug('Running test with command \'%s\'', cmd)

	188 exit_code, output = cmd_helper.GetCmdStatusAndOutputWithTimeout(

	189 cmd, timeout, cwd=cwd, shell=True, logfile=logfile)

	190 json_output = self._ReadChartjsonOutput()

	191 except cmd_helper.TimeoutError as e:

	192 exit_code = -1

	193 output = e.output

	194 json_output = ''

	195 finally:

	196 if self._test_instance.single_step:

	197 logfile.stop()

	198 return cmd, exit_code, output, json_output

	199

	200 def _ProcessTestResult(

	201 self, test, cmd, start_time, end_time, exit_code, output, json_output):

	202 if exit_code is None:

	203 exit_code = -1

	204 logging.info('%s : exit_code=%d in %d secs on device %s',

	205 test, exit_code, end_time - start_time,

	206 str(self._device))

	207 if exit_code == 0:

	208 result_type = base_test_result.ResultType.PASS

	209 else:

	210 result_type = base_test_result.ResultType.FAIL

	211 # TODO(rnephew): Improve device recovery logic.

	212 try:

	213 self._device.WaitUntilFullyBooted(timeout=120)
	mikecase (-- gone --) 2016/06/01 17:40:27 We should probably have some shared GetDeviceBootT We should probably have some shared GetDeviceBootTimeout function in constants.py that looks at device type and Android version and just returns a good timeout. At least, for the time being, would be nice to factor the 120 out to the top of the module. rnephew (Reviews Here) 2016/06/01 20:32:04 Done. Show quoted text On 2016/06/01 17:40:27, mikecase wrote: > We should probably have some shared GetDeviceBootTimeout function in > constants.py that > looks at device type and Android version and just returns a good timeout. > > At least, for the time being, would be nice to factor the 120 out to the top of > the module. Done.
	214 except device_errors.CommandTimeoutError:

	215 logging.exception('Device failed to return after %s.', test)

	216 actual_exit_code = exit_code

	217 if (self._test_instance.flaky_steps

	218 and test in self._test_instance.flaky_steps):

	219 exit_code = 0

	220 archive_bytes = (self._ArchiveOutputDir()

	221 if self._tests[test].get('archive_output_dir')

	222 else None)

	223 persisted_result = {

	224 'name': test,

	225 'output': [output],

	226 'chartjson': json_output,

	227 'archive_bytes': archive_bytes,

	228 'exit_code': exit_code,

	229 'actual_exit_code': actual_exit_code,

	230 'result_type': result_type,

	231 'start_time': start_time,

	232 'end_time': end_time,

	233 'total_time': end_time - start_time,

	234 'device': str(self._device),

	235 'cmd': cmd,

	236 }

	237 self._SaveResult(persisted_result)

	238 return result_type

	239

	240 def RunTestsOnShard(self):

	241 for test in self._tests:

	242 self._TestSetUp()

	243

	244 try:

	245 exit_code = None

	246 tries_left = self._retries

	247

	248 while exit_code != 0 and tries_left > 0:

	249 tries_left = tries_left - 1

	250 start_time = time.time()

	251 cmd, exit_code, output, json_output = self._RunSingleTest(test)

	252 end_time = time.time()

	253 result_type = self._ProcessTestResult(

	254 test, cmd, start_time, end_time, exit_code, output, json_output)

	255

	256 result = base_test_result.TestRunResults()

	257 result.AddResult(base_test_result.BaseTestResult(test, result_type))

	258 self._results.append(result)

	259 finally:

	260 self._CleanupOutputDirectory()

	261

	262 @staticmethod

	263 def _SaveResult(result):

	264 pickled = os.path.join(constants.PERF_OUTPUT_DIR, result['name'])

	265 if os.path.exists(pickled):

	266 with file(pickled, 'r') as f:

	267 previous = pickle.loads(f.read())

	268 result['output'] = previous['output'] + result['output']
	mikecase (-- gone --) 2016/06/01 17:40:28 Do this save files ever get cleared? It looks like Do this save files ever get cleared? It looks like they will just keep on growing forever. Is that a problem? rnephew (Reviews Here) 2016/06/01 20:32:05 That.. that is a bug. It should be deleted in the Show quoted text On 2016/06/01 17:40:28, mikecase wrote: > Do this save files ever get cleared? It looks like they will just keep on > growing forever. Is that a problem? That.. that is a bug. It should be deleted in the test_run_instance setup.
	269 with file(pickled, 'w') as f:

	270 f.write(pickle.dumps(result))

	271

	272 def _ArchiveOutputDir(self):

	273 """Archive all files in the output dir, and return as compressed bytes."""

	274 with io.BytesIO() as archive:

	275 with zipfile.ZipFile(archive, 'w', zipfile.ZIP_DEFLATED) as contents:

	276 num_files = 0

	277 for absdir, _, files in os.walk(self._output_dir):

	278 reldir = os.path.relpath(absdir, self._output_dir)

	279 for filename in files:

	280 src_path = os.path.join(absdir, filename)

	281 # We use normpath to turn './file.txt' into just 'file.txt'.

	282 dst_path = os.path.normpath(os.path.join(reldir, filename))

	283 contents.write(src_path, dst_path)

	284 num_files += 1

	285 if num_files:

	286 logging.info('%d files in the output dir were archived.', num_files)

	287 else:

	288 logging.warning('No files in the output dir. Archive is empty.')

	289 return archive.getvalue()

	290

	291 def _ResetWatcher(self):

	292 if self._watcher:

	293 self._watcher.Reset()

	294

	295 def _BatteryLevelCheck(self):

	296 logging.info('Charge level: %s%%',

	297 str(self._battery.GetBatteryInfo().get('level')))

	298 if self._test_instance.min_battery_level:

	299 self._battery.ChargeDeviceToLevel(self._test_instance.min_battery_level)

	300

	301 def _ScreenCheck(self):

	302 if not self._device.IsScreenOn():

	303 self._device.SetScreen(True)

	304

	305 def _BatteryTempCheck(self):

	306 logging.info('temperature: %s (0.1 C)',

	307 str(self._battery.GetBatteryInfo().get('temperature')))

	308 if self._test_instance.max_battery_temp:

	309 self._battery.LetBatteryCoolToTemperature(

	310 self._test_instance.max_battery_temp)

	311

	312

	313 class LocalDevicePerfTestRun(local_device_test_run.LocalDeviceTestRun):

	314 def __init__(self, env, test_instance):

	315 super(LocalDevicePerfTestRun, self).__init__(env, test_instance)

	316 self._test_instance = test_instance

	317 self._env = env

	318 self._timeout = 10 * 60 # Ten minutes

	319 self._devices = None

	320 self._test_buckets = []

	321 self._watcher = None

	322

	323 def SetUp(self):

	324 self._devices = self._GetAllDevices(self._env.devices,

	325 self._test_instance.known_devices_file)

	326 self._watcher = watchdog_timer.WatchdogTimer(self._timeout)

	327

	328 def TearDown(self):

	329 pass

	330

	331 def _GetStepsFromDict(self):

	332 if self._test_instance.single_step:

	333 return {

	334 'version': 1,

	335 'steps': {

	336 'single_step': {

	337 'device_affinity': 0,

	338 'cmd': self._test_instance.single_step

	339 },

	340 }

	341 }

	342 if self._test_instance.steps:

	343 with file(self._test_instance.steps, 'r') as f:

	344 steps = json.load(f)

	345 assert steps['version'] == 1

	346 return steps

	347

	348 def _SplitTestsByAffinity(self):

	349 test_dict = self._GetStepsFromDict()

	350 for test in test_dict['steps']:

	351 affinity = test_dict['steps'][test]['device_affinity']

	352 if len(self._test_buckets) < affinity + 1:

	353 while len(self._test_buckets) != affinity + 1:

	354 self._test_buckets.append({})

	355 self._test_buckets[affinity][test] = test_dict['steps'][test]

	356 return self._test_buckets

	357

	358 @staticmethod

	359 def _GetAllDevices(active_devices, devices_path):

	360 if not devices_path:

	361 logging.warning('Known devices file path not being passed. For device '

	362 'affinity to work properly, it must be passed.')

	363 try:

	364 if devices_path:

	365 devices = [device_utils.DeviceUtils(s)

	366 for s in device_list.GetPersistentDeviceList(devices_path)]

	367 else:

	368 logging.warning('Known devices file path not being passed. For device '

	369 'affinity to work properly, it must be passed.')

	370 devices = active_devices

	371 except IOError as e:

	372 logging.error('Unable to find %s [%s]', devices_path, e)

	373 devices = active_devices

	374 return sorted(devices)

	375

	376

	377 def RunTests(self):

	378 # Option selected for saving a json file with a list of test names.

	379 if self._test_instance.output_json_list:

	380 return self._test_instance.OutputJsonList()

	381

	382 # Just print the results from a single previously executed step.

	383 if self._test_instance.print_step:

	384 return self._test_instance.PrintTestOutput()

	385

	386 # Affinitize the tests.

	387 test_buckets = self._SplitTestsByAffinity()

	388 if not test_buckets:

	389 raise NotImplementedError('No tests found!')

	390

	391 threads = []

	392 results = []

	393 for x in xrange(len(self._devices)):

	394 new_shard = TestShard(self._test_instance, self._devices[x], x,

	395 test_buckets[x], results, watcher=self._watcher)

	396 threads.append(reraiser_thread.ReraiserThread(new_shard.RunTestsOnShard))

	397

	398 workers = reraiser_thread.ReraiserThreadGroup(threads)

	399 workers.StartAll()

	400

	401 try:

	402 workers.JoinAll(self._watcher)

	403 except device_errors.CommandFailedError:

	404 logging.exception('Command failed on device.')

	405 except device_errors.CommandTimeoutError:

	406 logging.exception('Command timed out on device.')

	407 except device_errors.DeviceUnreachableError:

	408 logging.exception('Device became unreachable.')

	409 return results

	410

	411 # override

	412 def TestPackage(self):

	413 return 'Perf'

	414

	415 # override

	416 def _CreateShards(self, _tests):

	417 raise NotImplementedError

	418

	419 # override

	420 def _GetTests(self):

	421 return self._test_buckets

	422

	423 # override

	424 def _RunTest(self, _device, _test):

	425 raise NotImplementedError

	426

	427 # override

	428 def _ShouldShard(self):

	429 return False

OLD	NEW