Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(236)

Side by Side Diff: build/android/buildbot/bb_device_status_check.py

Issue 1148873007: Fix last_devices to be quieter, and improve device affinity. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Sort devices again for reboot-stability Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | build/android/pylib/device/device_list.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # 2 #
3 # Copyright 2013 The Chromium Authors. All rights reserved. 3 # Copyright 2013 The Chromium Authors. All rights reserved.
4 # Use of this source code is governed by a BSD-style license that can be 4 # Use of this source code is governed by a BSD-style license that can be
5 # found in the LICENSE file. 5 # found in the LICENSE file.
6 6
7 """A class to keep track of devices across builds and report state.""" 7 """A class to keep track of devices across builds and report state."""
8 import json 8 import json
9 import logging 9 import logging
10 import optparse 10 import optparse
(...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after
117 117
118 Args: 118 Args:
119 options: out_dir parameter of options argument is used as the base 119 options: out_dir parameter of options argument is used as the base
120 directory to load and update the cache file. 120 directory to load and update the cache file.
121 devices: A list of DeviceUtils instance for the currently visible and 121 devices: A list of DeviceUtils instance for the currently visible and
122 online attached devices. 122 online attached devices.
123 """ 123 """
124 out_dir = os.path.abspath(options.out_dir) 124 out_dir = os.path.abspath(options.out_dir)
125 device_serials = set(d.adb.GetDeviceSerial() for d in devices) 125 device_serials = set(d.adb.GetDeviceSerial() for d in devices)
126 126
127 # last_devices denotes all known devices prior to this run 127 # last_devices denotes all known devices since the last time a new device was
128 # detected, mapped to the number of times it has been seen offline
129 # contiguously.
128 last_devices_path = os.path.join(out_dir, device_list.LAST_DEVICES_FILENAME) 130 last_devices_path = os.path.join(out_dir, device_list.LAST_DEVICES_FILENAME)
129 last_missing_devices_path = os.path.join(out_dir,
130 device_list.LAST_MISSING_DEVICES_FILENAME)
131 try: 131 try:
132 last_devices = device_list.GetPersistentDeviceList(last_devices_path) 132 last_devices = device_list.ReadDeviceOfflineCountMap(last_devices_path)
133 except IOError: 133 except IOError:
134 # Ignore error, file might not exist 134 # Ignore error, file might not exist.
135 last_devices = [] 135 last_devices = {}
136 136
137 try:
138 last_missing_devices = device_list.GetPersistentDeviceList(
139 last_missing_devices_path)
140 except IOError:
141 last_missing_devices = []
142 137
143 missing_devs = list(set(last_devices) - device_serials) 138 # Add 1 to the offline count for each device currently offline; reset the
144 new_missing_devs = list(set(missing_devs) - set(last_missing_devices)) 139 # rest.
140 def freshen_device(k):
141 if k in device_serials:
142 return 0
143 else:
144 return last_devices[k] + 1
145 last_devices = {k: freshen_device(k) for k in last_devices}
145 146
146 if new_missing_devs and os.environ.get('BUILDBOT_SLAVENAME'): 147 missing_devices = {k: v for k, v in last_devices.iteritems() if v != 0}
147 logging.info('new_missing_devs %s' % new_missing_devs) 148 if missing_devices:
148 devices_missing_msg = '%d devices not detected.' % len(missing_devs) 149 logging.info('Missing devices: %s' % missing_devices)
149 bb_annotations.PrintSummaryText(devices_missing_msg)
150 150
151 from_address = 'chrome-bot@chromium.org' 151 # Warn about devices that are missing once, but take no further action.
152 to_addresses = ['chrome-labs-tech-ticket@google.com', 152 # This is because sometimes devices are still rebooting when we check.
153 'chrome-android-device-alert@google.com'] 153 once_missing = [k for k, v in last_devices.iteritems() if v == 1]
154 cc_addresses = ['chrome-android-device-alert@google.com'] 154 if once_missing:
155 subject = 'Devices offline on %s, %s, %s' % ( 155 bb_annotations.PrintSummaryText(
156 os.environ.get('BUILDBOT_SLAVENAME'), 156 '%d devices missing since last run' % len(once_missing))
157 os.environ.get('BUILDBOT_BUILDERNAME'),
158 os.environ.get('BUILDBOT_BUILDNUMBER'))
159 msg = ('Please reboot the following devices:\n%s' %
160 '\n'.join(map(str, new_missing_devs)))
161 SendEmail(from_address, to_addresses, cc_addresses, subject, msg)
162 157
163 all_known_devices = list(device_serials | set(last_devices)) 158 # Send an email for twice missing devices. This indicates a real problem.
164 device_list.WritePersistentDeviceList(last_devices_path, all_known_devices) 159 twice_missing = [k for k, v in last_devices.iteritems() if v == 2]
165 device_list.WritePersistentDeviceList(last_missing_devices_path, missing_devs) 160 if twice_missing:
161 bb_annotations.PrintSummaryText(
162 '%s devices missing for two runs -- notifying' % len(twice_missing))
163 if os.environ.get('BUILDBOT_SLAVENAME'):
164 from_address = 'chrome-bot@chromium.org'
165 to_addresses = ['chrome-labs-tech-ticket@google.com',
166 'chrome-android-device-alert@google.com']
167 cc_addresses = ['chrome-android-device-alert@google.com']
168 subject = 'Devices offline on %s, %s, %s' % (
169 os.environ.get('BUILDBOT_SLAVENAME'),
170 os.environ.get('BUILDBOT_BUILDERNAME'),
171 os.environ.get('BUILDBOT_BUILDNUMBER'))
172 msg = ('Please reboot the following devices:\n%s' %
173 '\n'.join(map(str, twice_missing)))
174 SendEmail(from_address, to_addresses, cc_addresses, subject, msg)
166 175
167 if not all_known_devices: 176 quite_missing = [k for k, v in last_devices.iteritems() if v > 2]
168 # This can happen if for some reason the .last_devices file is not 177 if quite_missing:
169 # present or if it was empty. 178 bb_annotations.PrintSummaryTest(
170 return ['No online devices. Have any devices been plugged in?'] 179 '%s devices missing for more than two runs' % len(quite_missing))
171 if missing_devs: 180
172 devices_missing_msg = '%d devices not detected.' % len(missing_devs) 181 if not devices:
173 bb_annotations.PrintSummaryText(devices_missing_msg) 182 # This can happen if for some reason the .last_devices file is not
174 return ['Current online devices: %s' % ', '.join(d for d in device_serials), 183 # present or if it was empty.
175 '%s are no longer visible. Were they removed?' % missing_devs] 184 return ['No online devices. Have any devices been plugged in?']
185 if missing_devices:
186 return ['Current online devices: %s' % device_serials,
187 '%s are no longer visible. Were they removed?\n' %
188 missing_devices.keys()]
176 else: 189 else:
177 new_devs = device_serials - set(last_devices) 190 new_devices = [k for k in device_serials if k not in last_devices]
178 if new_devs and os.path.exists(last_devices_path): 191 if new_devices and os.path.exists(last_devices_path):
179 bb_annotations.PrintWarning() 192 bb_annotations.PrintWarning()
180 bb_annotations.PrintSummaryText( 193 bb_annotations.PrintSummaryText(
181 '%d new devices detected' % len(new_devs)) 194 '%d new devices detected' % len(new_devices))
182 logging.info('New devices detected:') 195 logging.info('%s new devices detected:' % len(new_devices))
183 for d in new_devs: 196 for d in new_devices:
184 logging.info(' %s', d) 197 logging.info(' %s', d)
198 # Reset last_devices since we have probably seen admin intervention, so
199 # we don't keep warning about the same old stuff.
200 last_devices = {k: 0 for k in device_serials}
201
202 device_list.WriteDeviceOfflineCountMap(last_devices_path, last_devices)
185 203
186 204
187 def SendEmail(from_address, to_addresses, cc_addresses, subject, msg): 205 def SendEmail(from_address, to_addresses, cc_addresses, subject, msg):
188 msg_body = '\r\n'.join(['From: %s' % from_address, 206 msg_body = '\r\n'.join(['From: %s' % from_address,
189 'To: %s' % ', '.join(to_addresses), 207 'To: %s' % ', '.join(to_addresses),
190 'CC: %s' % ', '.join(cc_addresses), 208 'CC: %s' % ', '.join(cc_addresses),
191 'Subject: %s' % subject, '', msg]) 209 'Subject: %s' % subject, '', msg])
192 try: 210 try:
193 server = smtplib.SMTP('localhost') 211 server = smtplib.SMTP('localhost')
194 server.sendmail(from_address, to_addresses, msg_body) 212 server.sendmail(from_address, to_addresses, msg_body)
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after
275 options, args = parser.parse_args() 293 options, args = parser.parse_args()
276 if args: 294 if args:
277 parser.error('Unknown options %s' % args) 295 parser.error('Unknown options %s' % args)
278 296
279 run_tests_helper.SetLogLevel(options.verbose) 297 run_tests_helper.SetLogLevel(options.verbose)
280 298
281 # Remove the last build's "bad devices" before checking device statuses. 299 # Remove the last build's "bad devices" before checking device statuses.
282 device_blacklist.ResetBlacklist() 300 device_blacklist.ResetBlacklist()
283 301
284 try: 302 try:
285 expected_devices = device_list.GetPersistentDeviceList( 303 last_devices_path = os.path.join(
286 os.path.join(options.out_dir, device_list.LAST_DEVICES_FILENAME)) 304 options.out_dir, device_list.LAST_DEVICES_FILENAME)
305 expected_devices = device_list.ReadDeviceOfflineCountMap(
306 last_devices_path).keys()
287 except IOError: 307 except IOError:
288 expected_devices = [] 308 expected_devices = []
289 devices = device_utils.DeviceUtils.HealthyDevices() 309 devices = device_utils.DeviceUtils.HealthyDevices()
290 device_serials = [d.adb.GetDeviceSerial() for d in devices] 310 device_serials = [d.adb.GetDeviceSerial() for d in devices]
291 # Only restart usb if devices are missing. 311 # Only restart usb if devices are missing.
292 if set(expected_devices) != set(device_serials): 312 if set(expected_devices) != set(device_serials):
293 logging.warning('expected_devices: %s', expected_devices) 313 logging.warning('expected_devices: %s', expected_devices)
294 logging.warning('devices: %s', device_serials) 314 logging.warning('devices: %s', device_serials)
295 KillAllAdb() 315 KillAllAdb()
296 retries = 5 316 retries = 5
(...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after
395 415
396 if num_failed_devs == len(devices): 416 if num_failed_devs == len(devices):
397 return 2 417 return 2
398 418
399 if not devices: 419 if not devices:
400 return 1 420 return 1
401 421
402 422
403 if __name__ == '__main__': 423 if __name__ == '__main__':
404 sys.exit(main()) 424 sys.exit(main())
OLDNEW
« no previous file with comments | « no previous file | build/android/pylib/device/device_list.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698