Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(49)

Side by Side Diff: build/android/buildbot/bb_device_status_check.py

Issue 1148873007: Fix last_devices to be quieter, and improve device affinity. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Improve diagram Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # 2 #
3 # Copyright 2013 The Chromium Authors. All rights reserved. 3 # Copyright 2013 The Chromium Authors. All rights reserved.
4 # Use of this source code is governed by a BSD-style license that can be 4 # Use of this source code is governed by a BSD-style license that can be
5 # found in the LICENSE file. 5 # found in the LICENSE file.
6 6
7 """A class to keep track of devices across builds and report state.""" 7 """A class to keep track of devices across builds and report state."""
8 import json 8 import json
9 import logging 9 import logging
10 import optparse 10 import optparse
(...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after
117 117
118 Args: 118 Args:
119 options: out_dir parameter of options argument is used as the base 119 options: out_dir parameter of options argument is used as the base
120 directory to load and update the cache file. 120 directory to load and update the cache file.
121 devices: A list of DeviceUtils instance for the currently visible and 121 devices: A list of DeviceUtils instance for the currently visible and
122 online attached devices. 122 online attached devices.
123 """ 123 """
124 out_dir = os.path.abspath(options.out_dir) 124 out_dir = os.path.abspath(options.out_dir)
125 device_serials = set(d.adb.GetDeviceSerial() for d in devices) 125 device_serials = set(d.adb.GetDeviceSerial() for d in devices)
126 126
127 # last_devices denotes all known devices prior to this run 127 # last_devices denotes all known devices since the last time a new device was
128 # detected
navabi 2015/05/27 23:36:19 It looks like it denotes a count map. The comment
luqui 2015/05/28 00:59:54 Done.
128 last_devices_path = os.path.join(out_dir, device_list.LAST_DEVICES_FILENAME) 129 last_devices_path = os.path.join(out_dir, device_list.LAST_DEVICES_FILENAME)
129 last_missing_devices_path = os.path.join(out_dir,
130 device_list.LAST_MISSING_DEVICES_FILENAME)
131 try: 130 try:
132 last_devices = device_list.GetPersistentDeviceList(last_devices_path) 131 last_devices = device_list.ReadDeviceOfflineCountMap(last_devices_path)
133 except IOError: 132 except IOError:
134 # Ignore error, file might not exist 133 # Ignore error, file might not exist
135 last_devices = [] 134 last_devices = {}
136 135
137 try:
138 last_missing_devices = device_list.GetPersistentDeviceList(
139 last_missing_devices_path)
140 except IOError:
141 last_missing_devices = []
142 136
143 missing_devs = list(set(last_devices) - device_serials) 137 # Increment the count of missing devices and add new devices to the map.
navabi 2015/05/27 23:36:20 this comment is also confusing. Makes it sound lik
luqui 2015/05/28 00:59:54 Done.
144 new_missing_devs = list(set(missing_devs) - set(last_missing_devices)) 138 def freshen_device(k):
139 if k in device_serials:
140 return 0
141 else:
142 return last_devices[k] + 1
143 last_devices = {k: freshen_device(k) for k in last_devices}
145 144
146 if new_missing_devs and os.environ.get('BUILDBOT_SLAVENAME'): 145 missing_devices = {k: v for k, v in last_devices.iteritems() if v != 0}
147 logging.info('new_missing_devs %s' % new_missing_devs) 146 if missing_devices:
148 devices_missing_msg = '%d devices not detected.' % len(missing_devs) 147 logging.info('Missing devices: %s' % missing_devices)
149 bb_annotations.PrintSummaryText(devices_missing_msg)
150 148
151 from_address = 'chrome-bot@chromium.org' 149 # Warn about devices that are missing once, but take no further action.
152 to_addresses = ['chrome-labs-tech-ticket@google.com', 150 # This is because sometimes devices are still rebooting when we check.
153 'chrome-android-device-alert@google.com'] 151 once_missing = [k for k, v in last_devices.iteritems() if v == 1]
154 cc_addresses = ['chrome-android-device-alert@google.com'] 152 if once_missing:
155 subject = 'Devices offline on %s, %s, %s' % ( 153 bb_annotations.PrintSummaryText(
156 os.environ.get('BUILDBOT_SLAVENAME'), 154 '%d devices missing since last run' % len(once_missing))
157 os.environ.get('BUILDBOT_BUILDERNAME'),
158 os.environ.get('BUILDBOT_BUILDNUMBER'))
159 msg = ('Please reboot the following devices:\n%s' %
160 '\n'.join(map(str, new_missing_devs)))
161 SendEmail(from_address, to_addresses, cc_addresses, subject, msg)
162 155
163 all_known_devices = list(device_serials | set(last_devices)) 156 # Send an email for twice missing devices. This indicates a real problem.
164 device_list.WritePersistentDeviceList(last_devices_path, all_known_devices) 157 twice_missing = [k for k, v in last_devices.iteritems() if v == 2]
165 device_list.WritePersistentDeviceList(last_missing_devices_path, missing_devs) 158 if twice_missing:
159 bb_annotations.PrintSummaryText(
160 '%s devices missing for two runs -- notifying' % len(twice_missing))
161 if os.environ.get('BUILDBOT_SLAVENAME'):
162 from_address = 'chrome-bot@chromium.org'
163 to_addresses = ['chrome-labs-tech-ticket@google.com',
164 'chrome-android-device-alert@google.com']
165 cc_addresses = ['chrome-android-device-alert@google.com']
166 subject = 'Devices offline on %s, %s, %s' % (
167 os.environ.get('BUILDBOT_SLAVENAME'),
168 os.environ.get('BUILDBOT_BUILDERNAME'),
169 os.environ.get('BUILDBOT_BUILDNUMBER'))
170 msg = ('Please reboot the following devices:\n%s' %
171 '\n'.join(map(str, twice_missing)))
172 SendEmail(from_address, to_addresses, cc_addresses, subject, msg)
166 173
167 if not all_known_devices: 174 quite_missing = [k for k, v in last_devices.iteritems() if v > 2]
168 # This can happen if for some reason the .last_devices file is not 175 if quite_missing:
169 # present or if it was empty. 176 bb_annotations.PrintSummaryTest(
170 return ['No online devices. Have any devices been plugged in?'] 177 '%s devices missing for more than two runs' % len(quite_missing))
171 if missing_devs: 178
172 devices_missing_msg = '%d devices not detected.' % len(missing_devs) 179 if not devices:
173 bb_annotations.PrintSummaryText(devices_missing_msg) 180 # This can happen if for some reason the .last_devices file is not
174 return ['Current online devices: %s' % ', '.join(d for d in device_serials), 181 # present or if it was empty.
175 '%s are no longer visible. Were they removed?' % missing_devs] 182 return ['No online devices. Have any devices been plugged in?']
183 if missing_devices:
184 return ['Current online devices: %s' % device_serials,
185 '%s are no longer visible. Were they removed?\n' %
186 missing_devices.keys()]
176 else: 187 else:
177 new_devs = device_serials - set(last_devices) 188 new_devices = [k for k in device_serials if k not in last_devices]
178 if new_devs and os.path.exists(last_devices_path): 189 if new_devices and os.path.exists(last_devices_path):
179 bb_annotations.PrintWarning() 190 bb_annotations.PrintWarning()
180 bb_annotations.PrintSummaryText( 191 bb_annotations.PrintSummaryText(
181 '%d new devices detected' % len(new_devs)) 192 '%d new devices detected' % len(new_devices))
182 logging.info('New devices detected:') 193 logging.info('%s new devices detected:' % len(new_devices))
183 for d in new_devs: 194 for d in new_devices:
184 logging.info(' %s', d) 195 logging.info(' %s', d)
196 # Reset last_devices since we have probably seen admin intervention, so
197 # we don't keep warning about the same old stuff.
198 last_devices = {k: 0 for k in device_serials}
199
200 device_list.WriteDeviceOfflineCountMap(last_devices_path, last_devices)
185 201
186 202
187 def SendEmail(from_address, to_addresses, cc_addresses, subject, msg): 203 def SendEmail(from_address, to_addresses, cc_addresses, subject, msg):
188 msg_body = '\r\n'.join(['From: %s' % from_address, 204 msg_body = '\r\n'.join(['From: %s' % from_address,
189 'To: %s' % ', '.join(to_addresses), 205 'To: %s' % ', '.join(to_addresses),
190 'CC: %s' % ', '.join(cc_addresses), 206 'CC: %s' % ', '.join(cc_addresses),
191 'Subject: %s' % subject, '', msg]) 207 'Subject: %s' % subject, '', msg])
192 try: 208 try:
193 server = smtplib.SMTP('localhost') 209 server = smtplib.SMTP('localhost')
194 server.sendmail(from_address, to_addresses, msg_body) 210 server.sendmail(from_address, to_addresses, msg_body)
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after
275 options, args = parser.parse_args() 291 options, args = parser.parse_args()
276 if args: 292 if args:
277 parser.error('Unknown options %s' % args) 293 parser.error('Unknown options %s' % args)
278 294
279 run_tests_helper.SetLogLevel(options.verbose) 295 run_tests_helper.SetLogLevel(options.verbose)
280 296
281 # Remove the last build's "bad devices" before checking device statuses. 297 # Remove the last build's "bad devices" before checking device statuses.
282 device_blacklist.ResetBlacklist() 298 device_blacklist.ResetBlacklist()
283 299
284 try: 300 try:
285 expected_devices = device_list.GetPersistentDeviceList( 301 last_devices_path = os.path.join(
286 os.path.join(options.out_dir, device_list.LAST_DEVICES_FILENAME)) 302 options.out_dir, device_list.LAST_DEVICES_FILENAME)
303 expected_devices = device_list.ReadDeviceOfflineCountMap(
304 last_devices_path).keys()
287 except IOError: 305 except IOError:
288 expected_devices = [] 306 expected_devices = []
289 devices = device_utils.DeviceUtils.HealthyDevices() 307 devices = device_utils.DeviceUtils.HealthyDevices()
290 device_serials = [d.adb.GetDeviceSerial() for d in devices] 308 device_serials = [d.adb.GetDeviceSerial() for d in devices]
291 # Only restart usb if devices are missing. 309 # Only restart usb if devices are missing.
292 if set(expected_devices) != set(device_serials): 310 if set(expected_devices) != set(device_serials):
293 logging.warning('expected_devices: %s', expected_devices) 311 logging.warning('expected_devices: %s', expected_devices)
294 logging.warning('devices: %s', device_serials) 312 logging.warning('devices: %s', device_serials)
295 KillAllAdb() 313 KillAllAdb()
296 retries = 5 314 retries = 5
(...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after
395 413
396 if num_failed_devs == len(devices): 414 if num_failed_devs == len(devices):
397 return 2 415 return 2
398 416
399 if not devices: 417 if not devices:
400 return 1 418 return 1
401 419
402 420
403 if __name__ == '__main__': 421 if __name__ == '__main__':
404 sys.exit(main()) 422 sys.exit(main())
OLDNEW
« no previous file with comments | « no previous file | build/android/pylib/device/device_list.py » ('j') | build/android/pylib/perf/setup.py » ('J')

Powered by Google App Engine
This is Rietveld 408576698