Index: build/android/buildbot/bb_device_status_check.py |
diff --git a/build/android/buildbot/bb_device_status_check.py b/build/android/buildbot/bb_device_status_check.py |
index 917c51e28dcce8471d802e5ccbc16d380f283d84..a65490fbbf5164d1fa400d96cbfea9611b4421bd 100755 |
--- a/build/android/buildbot/bb_device_status_check.py |
+++ b/build/android/buildbot/bb_device_status_check.py |
@@ -124,64 +124,80 @@ def CheckForMissingDevices(options, devices): |
out_dir = os.path.abspath(options.out_dir) |
device_serials = set(d.adb.GetDeviceSerial() for d in devices) |
- # last_devices denotes all known devices prior to this run |
+ # last_devices denotes all known devices since the last time a new device was |
+ # detected |
navabi
2015/05/27 23:36:19
It looks like it denotes a count map. The comment
luqui
2015/05/28 00:59:54
Done.
|
last_devices_path = os.path.join(out_dir, device_list.LAST_DEVICES_FILENAME) |
- last_missing_devices_path = os.path.join(out_dir, |
- device_list.LAST_MISSING_DEVICES_FILENAME) |
try: |
- last_devices = device_list.GetPersistentDeviceList(last_devices_path) |
+ last_devices = device_list.ReadDeviceOfflineCountMap(last_devices_path) |
except IOError: |
# Ignore error, file might not exist |
- last_devices = [] |
+ last_devices = {} |
+ |
+ |
+ # Increment the count of missing devices and add new devices to the map. |
navabi
2015/05/27 23:36:20
this comment is also confusing. Makes it sound lik
luqui
2015/05/28 00:59:54
Done.
|
+ def freshen_device(k): |
+ if k in device_serials: |
+ return 0 |
+ else: |
+ return last_devices[k] + 1 |
+ last_devices = {k: freshen_device(k) for k in last_devices} |
+ |
+ missing_devices = {k: v for k, v in last_devices.iteritems() if v != 0} |
+ if missing_devices: |
+ logging.info('Missing devices: %s' % missing_devices) |
+ |
+ # Warn about devices that are missing once, but take no further action. |
+ # This is because sometimes devices are still rebooting when we check. |
+ once_missing = [k for k, v in last_devices.iteritems() if v == 1] |
+ if once_missing: |
+ bb_annotations.PrintSummaryText( |
+ '%d devices missing since last run' % len(once_missing)) |
+ |
+ # Send an email for twice missing devices. This indicates a real problem. |
+ twice_missing = [k for k, v in last_devices.iteritems() if v == 2] |
+ if twice_missing: |
+ bb_annotations.PrintSummaryText( |
+ '%s devices missing for two runs -- notifying' % len(twice_missing)) |
+ if os.environ.get('BUILDBOT_SLAVENAME'): |
+ from_address = 'chrome-bot@chromium.org' |
+ to_addresses = ['chrome-labs-tech-ticket@google.com', |
+ 'chrome-android-device-alert@google.com'] |
+ cc_addresses = ['chrome-android-device-alert@google.com'] |
+ subject = 'Devices offline on %s, %s, %s' % ( |
+ os.environ.get('BUILDBOT_SLAVENAME'), |
+ os.environ.get('BUILDBOT_BUILDERNAME'), |
+ os.environ.get('BUILDBOT_BUILDNUMBER')) |
+ msg = ('Please reboot the following devices:\n%s' % |
+ '\n'.join(map(str, twice_missing))) |
+ SendEmail(from_address, to_addresses, cc_addresses, subject, msg) |
+ |
+ quite_missing = [k for k, v in last_devices.iteritems() if v > 2] |
+ if quite_missing: |
+ bb_annotations.PrintSummaryTest( |
+ '%s devices missing for more than two runs' % len(quite_missing)) |
- try: |
- last_missing_devices = device_list.GetPersistentDeviceList( |
- last_missing_devices_path) |
- except IOError: |
- last_missing_devices = [] |
- |
- missing_devs = list(set(last_devices) - device_serials) |
- new_missing_devs = list(set(missing_devs) - set(last_missing_devices)) |
- |
- if new_missing_devs and os.environ.get('BUILDBOT_SLAVENAME'): |
- logging.info('new_missing_devs %s' % new_missing_devs) |
- devices_missing_msg = '%d devices not detected.' % len(missing_devs) |
- bb_annotations.PrintSummaryText(devices_missing_msg) |
- |
- from_address = 'chrome-bot@chromium.org' |
- to_addresses = ['chrome-labs-tech-ticket@google.com', |
- 'chrome-android-device-alert@google.com'] |
- cc_addresses = ['chrome-android-device-alert@google.com'] |
- subject = 'Devices offline on %s, %s, %s' % ( |
- os.environ.get('BUILDBOT_SLAVENAME'), |
- os.environ.get('BUILDBOT_BUILDERNAME'), |
- os.environ.get('BUILDBOT_BUILDNUMBER')) |
- msg = ('Please reboot the following devices:\n%s' % |
- '\n'.join(map(str, new_missing_devs))) |
- SendEmail(from_address, to_addresses, cc_addresses, subject, msg) |
- |
- all_known_devices = list(device_serials | set(last_devices)) |
- device_list.WritePersistentDeviceList(last_devices_path, all_known_devices) |
- device_list.WritePersistentDeviceList(last_missing_devices_path, missing_devs) |
- |
- if not all_known_devices: |
- # This can happen if for some reason the .last_devices file is not |
- # present or if it was empty. |
- return ['No online devices. Have any devices been plugged in?'] |
- if missing_devs: |
- devices_missing_msg = '%d devices not detected.' % len(missing_devs) |
- bb_annotations.PrintSummaryText(devices_missing_msg) |
- return ['Current online devices: %s' % ', '.join(d for d in device_serials), |
- '%s are no longer visible. Were they removed?' % missing_devs] |
+ if not devices: |
+ # This can happen if for some reason the .last_devices file is not |
+ # present or if it was empty. |
+ return ['No online devices. Have any devices been plugged in?'] |
+ if missing_devices: |
+ return ['Current online devices: %s' % device_serials, |
+ '%s are no longer visible. Were they removed?\n' % |
+ missing_devices.keys()] |
else: |
- new_devs = device_serials - set(last_devices) |
- if new_devs and os.path.exists(last_devices_path): |
+ new_devices = [k for k in device_serials if k not in last_devices] |
+ if new_devices and os.path.exists(last_devices_path): |
bb_annotations.PrintWarning() |
bb_annotations.PrintSummaryText( |
- '%d new devices detected' % len(new_devs)) |
- logging.info('New devices detected:') |
- for d in new_devs: |
+ '%d new devices detected' % len(new_devices)) |
+ logging.info('%s new devices detected:' % len(new_devices)) |
+ for d in new_devices: |
logging.info(' %s', d) |
+ # Reset last_devices since we have probably seen admin intervention, so |
+ # we don't keep warning about the same old stuff. |
+ last_devices = {k: 0 for k in device_serials} |
+ |
+ device_list.WriteDeviceOfflineCountMap(last_devices_path, last_devices) |
def SendEmail(from_address, to_addresses, cc_addresses, subject, msg): |
@@ -282,8 +298,10 @@ def main(): |
device_blacklist.ResetBlacklist() |
try: |
- expected_devices = device_list.GetPersistentDeviceList( |
- os.path.join(options.out_dir, device_list.LAST_DEVICES_FILENAME)) |
+ last_devices_path = os.path.join( |
+ options.out_dir, device_list.LAST_DEVICES_FILENAME) |
+ expected_devices = device_list.ReadDeviceOfflineCountMap( |
+ last_devices_path).keys() |
except IOError: |
expected_devices = [] |
devices = device_utils.DeviceUtils.HealthyDevices() |