Chromium Code Reviews| Index: build/android/buildbot/bb_device_status_check.py |
| diff --git a/build/android/buildbot/bb_device_status_check.py b/build/android/buildbot/bb_device_status_check.py |
| index 917c51e28dcce8471d802e5ccbc16d380f283d84..a65490fbbf5164d1fa400d96cbfea9611b4421bd 100755 |
| --- a/build/android/buildbot/bb_device_status_check.py |
| +++ b/build/android/buildbot/bb_device_status_check.py |
| @@ -124,64 +124,80 @@ def CheckForMissingDevices(options, devices): |
| out_dir = os.path.abspath(options.out_dir) |
| device_serials = set(d.adb.GetDeviceSerial() for d in devices) |
| - # last_devices denotes all known devices prior to this run |
| + # last_devices denotes all known devices since the last time a new device was |
| + # detected |
|
navabi
2015/05/27 23:36:19
It looks like it denotes a count map. The comment
luqui
2015/05/28 00:59:54
Done.
|
| last_devices_path = os.path.join(out_dir, device_list.LAST_DEVICES_FILENAME) |
| - last_missing_devices_path = os.path.join(out_dir, |
| - device_list.LAST_MISSING_DEVICES_FILENAME) |
| try: |
| - last_devices = device_list.GetPersistentDeviceList(last_devices_path) |
| + last_devices = device_list.ReadDeviceOfflineCountMap(last_devices_path) |
| except IOError: |
| # Ignore error, file might not exist |
| - last_devices = [] |
| + last_devices = {} |
| + |
| + |
| + # Increment the count of missing devices and add new devices to the map. |
|
navabi
2015/05/27 23:36:20
this comment is also confusing. Makes it sound lik
luqui
2015/05/28 00:59:54
Done.
|
| + def freshen_device(k): |
| + if k in device_serials: |
| + return 0 |
| + else: |
| + return last_devices[k] + 1 |
| + last_devices = {k: freshen_device(k) for k in last_devices} |
| + |
| + missing_devices = {k: v for k, v in last_devices.iteritems() if v != 0} |
| + if missing_devices: |
| + logging.info('Missing devices: %s' % missing_devices) |
| + |
| + # Warn about devices that are missing once, but take no further action. |
| + # This is because sometimes devices are still rebooting when we check. |
| + once_missing = [k for k, v in last_devices.iteritems() if v == 1] |
| + if once_missing: |
| + bb_annotations.PrintSummaryText( |
| + '%d devices missing since last run' % len(once_missing)) |
| + |
| + # Send an email for twice missing devices. This indicates a real problem. |
| + twice_missing = [k for k, v in last_devices.iteritems() if v == 2] |
| + if twice_missing: |
| + bb_annotations.PrintSummaryText( |
| + '%s devices missing for two runs -- notifying' % len(twice_missing)) |
| + if os.environ.get('BUILDBOT_SLAVENAME'): |
| + from_address = 'chrome-bot@chromium.org' |
| + to_addresses = ['chrome-labs-tech-ticket@google.com', |
| + 'chrome-android-device-alert@google.com'] |
| + cc_addresses = ['chrome-android-device-alert@google.com'] |
| + subject = 'Devices offline on %s, %s, %s' % ( |
| + os.environ.get('BUILDBOT_SLAVENAME'), |
| + os.environ.get('BUILDBOT_BUILDERNAME'), |
| + os.environ.get('BUILDBOT_BUILDNUMBER')) |
| + msg = ('Please reboot the following devices:\n%s' % |
| + '\n'.join(map(str, twice_missing))) |
| + SendEmail(from_address, to_addresses, cc_addresses, subject, msg) |
| + |
| + quite_missing = [k for k, v in last_devices.iteritems() if v > 2] |
| + if quite_missing: |
| + bb_annotations.PrintSummaryTest( |
| + '%s devices missing for more than two runs' % len(quite_missing)) |
| - try: |
| - last_missing_devices = device_list.GetPersistentDeviceList( |
| - last_missing_devices_path) |
| - except IOError: |
| - last_missing_devices = [] |
| - |
| - missing_devs = list(set(last_devices) - device_serials) |
| - new_missing_devs = list(set(missing_devs) - set(last_missing_devices)) |
| - |
| - if new_missing_devs and os.environ.get('BUILDBOT_SLAVENAME'): |
| - logging.info('new_missing_devs %s' % new_missing_devs) |
| - devices_missing_msg = '%d devices not detected.' % len(missing_devs) |
| - bb_annotations.PrintSummaryText(devices_missing_msg) |
| - |
| - from_address = 'chrome-bot@chromium.org' |
| - to_addresses = ['chrome-labs-tech-ticket@google.com', |
| - 'chrome-android-device-alert@google.com'] |
| - cc_addresses = ['chrome-android-device-alert@google.com'] |
| - subject = 'Devices offline on %s, %s, %s' % ( |
| - os.environ.get('BUILDBOT_SLAVENAME'), |
| - os.environ.get('BUILDBOT_BUILDERNAME'), |
| - os.environ.get('BUILDBOT_BUILDNUMBER')) |
| - msg = ('Please reboot the following devices:\n%s' % |
| - '\n'.join(map(str, new_missing_devs))) |
| - SendEmail(from_address, to_addresses, cc_addresses, subject, msg) |
| - |
| - all_known_devices = list(device_serials | set(last_devices)) |
| - device_list.WritePersistentDeviceList(last_devices_path, all_known_devices) |
| - device_list.WritePersistentDeviceList(last_missing_devices_path, missing_devs) |
| - |
| - if not all_known_devices: |
| - # This can happen if for some reason the .last_devices file is not |
| - # present or if it was empty. |
| - return ['No online devices. Have any devices been plugged in?'] |
| - if missing_devs: |
| - devices_missing_msg = '%d devices not detected.' % len(missing_devs) |
| - bb_annotations.PrintSummaryText(devices_missing_msg) |
| - return ['Current online devices: %s' % ', '.join(d for d in device_serials), |
| - '%s are no longer visible. Were they removed?' % missing_devs] |
| + if not devices: |
| + # This can happen if for some reason the .last_devices file is not |
| + # present or if it was empty. |
| + return ['No online devices. Have any devices been plugged in?'] |
| + if missing_devices: |
| + return ['Current online devices: %s' % device_serials, |
| + '%s are no longer visible. Were they removed?\n' % |
| + missing_devices.keys()] |
| else: |
| - new_devs = device_serials - set(last_devices) |
| - if new_devs and os.path.exists(last_devices_path): |
| + new_devices = [k for k in device_serials if k not in last_devices] |
| + if new_devices and os.path.exists(last_devices_path): |
| bb_annotations.PrintWarning() |
| bb_annotations.PrintSummaryText( |
| - '%d new devices detected' % len(new_devs)) |
| - logging.info('New devices detected:') |
| - for d in new_devs: |
| + '%d new devices detected' % len(new_devices)) |
| + logging.info('%s new devices detected:' % len(new_devices)) |
| + for d in new_devices: |
| logging.info(' %s', d) |
| + # Reset last_devices since we have probably seen admin intervention, so |
| + # we don't keep warning about the same old stuff. |
| + last_devices = {k: 0 for k in device_serials} |
| + |
| + device_list.WriteDeviceOfflineCountMap(last_devices_path, last_devices) |
| def SendEmail(from_address, to_addresses, cc_addresses, subject, msg): |
| @@ -282,8 +298,10 @@ def main(): |
| device_blacklist.ResetBlacklist() |
| try: |
| - expected_devices = device_list.GetPersistentDeviceList( |
| - os.path.join(options.out_dir, device_list.LAST_DEVICES_FILENAME)) |
| + last_devices_path = os.path.join( |
| + options.out_dir, device_list.LAST_DEVICES_FILENAME) |
| + expected_devices = device_list.ReadDeviceOfflineCountMap( |
| + last_devices_path).keys() |
| except IOError: |
| expected_devices = [] |
| devices = device_utils.DeviceUtils.HealthyDevices() |