Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(236)

Side by Side Diff: build/android/buildbot/bb_device_status_check.py

Issue 1148873007: Fix last_devices to be quieter, and improve device affinity. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # 2 #
3 # Copyright 2013 The Chromium Authors. All rights reserved. 3 # Copyright 2013 The Chromium Authors. All rights reserved.
4 # Use of this source code is governed by a BSD-style license that can be 4 # Use of this source code is governed by a BSD-style license that can be
5 # found in the LICENSE file. 5 # found in the LICENSE file.
6 6
7 """A class to keep track of devices across builds and report state.""" 7 """A class to keep track of devices across builds and report state."""
8 import json 8 import json
9 import logging 9 import logging
10 import optparse 10 import optparse
(...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after
106 if (device_product_name == 'mantaray' and 106 if (device_product_name == 'mantaray' and
107 battery_info.get('AC powered', None) != 'true'): 107 battery_info.get('AC powered', None) != 'true'):
108 errors += ['Mantaray device not connected to AC power.'] 108 errors += ['Mantaray device not connected to AC power.']
109 109
110 full_report = '\n'.join(report) 110 full_report = '\n'.join(report)
111 111
112 return (device_type, device_build, battery_level, full_report, errors, 112 return (device_type, device_build, battery_level, full_report, errors,
113 dev_good, json_data) 113 dev_good, json_data)
114 114
115 115
116 def CheckForMissingDevices(options, adb_online_devs): 116 def CheckForMissingDevices(options, adb_online_devices):
jbudorick 2015/05/23 01:06:49 rebase, I changed this function a few days ago.
luqui 2015/05/27 20:01:11 Done.
117 """Uses file of previous online devices to detect broken phones. 117 """Uses file of previous online devices to detect broken phones.
118 118
119 Args: 119 Args:
120 options: out_dir parameter of options argument is used as the base 120 options: out_dir parameter of options argument is used as the base
121 directory to load and update the cache file. 121 directory to load and update the cache file.
122 adb_online_devs: A list of serial numbers of the currently visible 122 adb_online_devices: A list of serial numbers of the currently visible
123 and online attached devices. 123 and online attached devices.
124 """ 124 """
125 # TODO(navabi): remove this once the bug that causes different number 125 # TODO(navabi): remove this once the bug that causes different number
126 # of devices to be detected between calls is fixed. 126 # of devices to be detected between calls is fixed.
127 logger = logging.getLogger() 127 logger = logging.getLogger()
128 logger.setLevel(logging.INFO) 128 logger.setLevel(logging.INFO)
129 129
130 out_dir = os.path.abspath(options.out_dir) 130 out_dir = os.path.abspath(options.out_dir)
131 131
132 # last_devices denotes all known devices prior to this run 132 # last_devices denotes all known devices since the last time a new device was
133 # detected
133 last_devices_path = os.path.join(out_dir, device_list.LAST_DEVICES_FILENAME) 134 last_devices_path = os.path.join(out_dir, device_list.LAST_DEVICES_FILENAME)
134 last_missing_devices_path = os.path.join(out_dir,
135 device_list.LAST_MISSING_DEVICES_FILENAME)
136 try: 135 try:
137 last_devices = device_list.GetPersistentDeviceList(last_devices_path) 136 last_devices = device_list.GetOfflineDeviceMap(last_devices_path)
138 except IOError: 137 except IOError:
139 # Ignore error, file might not exist 138 # Ignore error, file might not exist
140 last_devices = [] 139 last_devices = {}
141 140
142 try: 141 # Increment the count of any missing devices.
jbudorick 2015/05/23 01:06:49 last_devices = dict( (k, 0 if k in adb_onlin
luqui 2015/05/27 20:01:12 Yeah that was a bit clumsy. Went with a more verb
143 last_missing_devices = device_list.GetPersistentDeviceList( 142 for k in last_devices.keys():
144 last_missing_devices_path) 143 if k not in adb_online_devices:
145 except IOError: 144 last_devices[k] += 1
146 last_missing_devices = []
147 145
148 missing_devs = list(set(last_devices) - set(adb_online_devs)) 146 # Reset the count of any present devices, and find new devices.
149 new_missing_devs = list(set(missing_devs) - set(last_missing_devices)) 147 for k in adb_online_devices:
148 if k in last_devices:
149 last_devices[k] = 0
150 150
151 if new_missing_devs and os.environ.get('BUILDBOT_SLAVENAME'): 151 missing_devices = { k: v for k, v in last_devices.iteeritems() if v != 0 }
jbudorick 2015/05/23 01:06:48 - iteeritems -> iteritems - if not v - nit: no spa
luqui 2015/05/27 20:01:12 Done, except for "not v" -- mixing up numberness a
152 logging.info('new_missing_devs %s' % new_missing_devs) 152 if missing_devices:
153 devices_missing_msg = '%d devices not detected.' % len(missing_devs) 153 logging.info('Missing devices: %s' % missing_devices)
154 bb_annotations.PrintSummaryText(devices_missing_msg)
155 154
156 from_address = 'chrome-bot@chromium.org' 155 # Warn about devices that are missing once, but take no further action.
157 to_addresses = ['chrome-labs-tech-ticket@google.com', 156 # This is because sometimes devices are still rebooting when we check.
158 'chrome-android-device-alert@google.com'] 157 once_missing = [ k for k, v in last_devices.iteritems() if v == 1 ]
159 cc_addresses = ['chrome-android-device-alert@google.com'] 158 if once_missing:
160 subject = 'Devices offline on %s, %s, %s' % ( 159 bb_annotations.PrintSummaryText(
161 os.environ.get('BUILDBOT_SLAVENAME'), 160 '%d devices missing since last run' % len(once_missing))
162 os.environ.get('BUILDBOT_BUILDERNAME'),
163 os.environ.get('BUILDBOT_BUILDNUMBER'))
164 msg = ('Please reboot the following devices:\n%s' %
165 '\n'.join(map(str, new_missing_devs)))
166 SendEmail(from_address, to_addresses, cc_addresses, subject, msg)
167 161
168 all_known_devices = list(set(adb_online_devs) | set(last_devices)) 162 # Send an email for twice missing devices. This indicates a real problem.
169 device_list.WritePersistentDeviceList(last_devices_path, all_known_devices) 163 twice_missing = [ k for k, v in last_devices.iteritems() if v == 2 ]
170 device_list.WritePersistentDeviceList(last_missing_devices_path, missing_devs) 164 if twice_missing:
165 bb_annotations.PrintSummaryText(
166 '%s devices missing for two runs -- notifying' % len(twice_missing))
167 if os.environ.get('BUILDBOT_SLAVENAME'):
168 from_address = 'chrome-bot@chromium.org'
169 to_addresses = ['chrome-labs-tech-ticket@google.com',
170 'chrome-android-device-alert@google.com']
171 cc_addresses = ['chrome-android-device-alert@google.com']
172 subject = 'Devices offline on %s, %s, %s' % (
173 os.environ.get('BUILDBOT_SLAVENAME'),
174 os.environ.get('BUILDBOT_BUILDERNAME'),
175 os.environ.get('BUILDBOT_BUILDNUMBER'))
176 msg = ('Please reboot the following devices:\n%s' %
177 '\n'.join(map(str, twice_missing)))
178 SendEmail(from_address, to_addresses, cc_addresses, subject, msg)
171 179
172 if not all_known_devices: 180 quite_missing = [ k for k, v in last_devices.iteritems() if v == 3 ]
jbudorick 2015/05/23 01:06:49 if v > 2 ?
luqui 2015/05/27 20:01:11 Done.
173 # This can happen if for some reason the .last_devices file is not 181 if quite_missing:
174 # present or if it was empty. 182 bb_annotations.PrintSummaryTest(
175 return ['No online devices. Have any devices been plugged in?'] 183 '%s devices missing for more than two runs' % len(quite_missing))
176 if missing_devs:
177 devices_missing_msg = '%d devices not detected.' % len(missing_devs)
178 bb_annotations.PrintSummaryText(devices_missing_msg)
179 184
185 if not adb_online_devices:
186 # This can happen if for some reason the .last_devices file is not
187 # present or if it was empty.
188 return ['No online devices. Have any devices been plugged in?']
189 if missing_devices:
180 # TODO(navabi): Debug by printing both output from GetCmdOutput and 190 # TODO(navabi): Debug by printing both output from GetCmdOutput and
181 # GetAttachedDevices to compare results. 191 # GetAttachedDevices to compare results.
182 crbug_link = ('https://code.google.com/p/chromium/issues/entry?summary=' 192 crbug_link = (
183 '%s&comment=%s&labels=Restrict-View-Google,OS-Android,Infra' % 193 'https://code.google.com/p/chromium/issues/entry?summary='
184 (urllib.quote('Device Offline'), 194 '%s&comment=%s&labels=Restrict-View-Google,OS-Android,Infra' %
185 urllib.quote('Buildbot: %s %s\n' 195 (urllib.quote('Device Offline'),
186 'Build: %s\n' 196 urllib.quote('Buildbot: %s %s\n'
187 '(please don\'t change any labels)' % 197 'Build: %s\n'
188 (os.environ.get('BUILDBOT_BUILDERNAME'), 198 '(please don\'t change any labels)' %
189 os.environ.get('BUILDBOT_SLAVENAME'), 199 (os.environ.get('BUILDBOT_BUILDERNAME'),
200 os.environ.get('BUILDBOT_SLAVENAME'),
190 os.environ.get('BUILDBOT_BUILDNUMBER'))))) 201 os.environ.get('BUILDBOT_BUILDNUMBER')))))
191 return ['Current online devices: %s' % adb_online_devs, 202 return ['Current online devices: %s' % adb_online_devices,
192 '%s are no longer visible. Were they removed?\n' % missing_devs, 203 '%s are no longer visible. Were they removed?\n' %
204 missing_devices.keys(),
193 'SHERIFF:\n', 205 'SHERIFF:\n',
194 '@@@STEP_LINK@Click here to file a bug@%s@@@\n' % crbug_link, 206 '@@@STEP_LINK@Click here to file a bug@%s@@@\n' % crbug_link,
195 'Cache file: %s\n\n' % last_devices_path, 207 'Cache file: %s\n\n' % last_devices_path,
196 'adb devices: %s' % GetCmdOutput(['adb', 'devices']), 208 'adb devices: %s' % GetCmdOutput(['adb', 'devices']),
197 'adb devices(GetAttachedDevices): %s' % adb_online_devs] 209 'adb devices(GetAttachedDevices): %s' % adb_online_devices]
198 else: 210 else:
199 new_devs = set(adb_online_devs) - set(last_devices) 211 new_devices = [ k for k in adb_online_devices if k not in last_devices ]
200 if new_devs and os.path.exists(last_devices_path): 212 if new_devices and os.path.exists(last_devices_path):
201 bb_annotations.PrintWarning() 213 bb_annotations.PrintWarning()
202 bb_annotations.PrintSummaryText( 214 bb_annotations.PrintSummaryText(
203 '%d new devices detected' % len(new_devs)) 215 '%d new devices detected' % len(new_devices))
204 print ('New devices detected %s. And now back to your ' 216 print ('New devices detected %s. And now back to your '
205 'regularly scheduled program.' % list(new_devs)) 217 'regularly scheduled program.' % list(new_devices))
218 # Reset last_devices since we have probably seen admin intervention, so
219 # we don't keep warning about the same old stuff.
220 last_devices = { k: 0 for k in adb_online_devices }
221
222 device_list.WriteOfflineDeviceMap(last_devices_path, last_devices)
206 223
207 224
208 def SendEmail(from_address, to_addresses, cc_addresses, subject, msg): 225 def SendEmail(from_address, to_addresses, cc_addresses, subject, msg):
209 msg_body = '\r\n'.join(['From: %s' % from_address, 226 msg_body = '\r\n'.join(['From: %s' % from_address,
210 'To: %s' % ', '.join(to_addresses), 227 'To: %s' % ', '.join(to_addresses),
211 'CC: %s' % ', '.join(cc_addresses), 228 'CC: %s' % ', '.join(cc_addresses),
212 'Subject: %s' % subject, '', msg]) 229 'Subject: %s' % subject, '', msg])
213 try: 230 try:
214 server = smtplib.SMTP('localhost') 231 server = smtplib.SMTP('localhost')
215 server.sendmail(from_address, to_addresses, msg_body) 232 server.sendmail(from_address, to_addresses, msg_body)
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after
290 help='Output JSON information into a specified file.') 307 help='Output JSON information into a specified file.')
291 308
292 options, args = parser.parse_args() 309 options, args = parser.parse_args()
293 if args: 310 if args:
294 parser.error('Unknown options %s' % args) 311 parser.error('Unknown options %s' % args)
295 312
296 # Remove the last build's "bad devices" before checking device statuses. 313 # Remove the last build's "bad devices" before checking device statuses.
297 device_blacklist.ResetBlacklist() 314 device_blacklist.ResetBlacklist()
298 315
299 try: 316 try:
300 expected_devices = device_list.GetPersistentDeviceList( 317 last_devices_path = os.path.join(
301 os.path.join(options.out_dir, device_list.LAST_DEVICES_FILENAME)) 318 options.out_dir, device_list.LAST_DEVICES_FILENAME)
319 expected_devices = device_list.GetOfflineDeviceMap(
320 last_devices_path).keys()
302 except IOError: 321 except IOError:
303 expected_devices = [] 322 expected_devices = []
304 devices = android_commands.GetAttachedDevices() 323 devices = android_commands.GetAttachedDevices()
305 # Only restart usb if devices are missing. 324 # Only restart usb if devices are missing.
306 if set(expected_devices) != set(devices): 325 if set(expected_devices) != set(devices):
307 print 'expected_devices: %s, devices: %s' % (expected_devices, devices) 326 print 'expected_devices: %s, devices: %s' % (expected_devices, devices)
308 KillAllAdb() 327 KillAllAdb()
309 retries = 5 328 retries = 5
310 usb_restarted = True 329 usb_restarted = True
311 if options.restart_usb: 330 if options.restart_usb:
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after
384 403
385 if num_failed_devs == len(devices): 404 if num_failed_devs == len(devices):
386 return 2 405 return 2
387 406
388 if not devices: 407 if not devices:
389 return 1 408 return 1
390 409
391 410
392 if __name__ == '__main__': 411 if __name__ == '__main__':
393 sys.exit(main()) 412 sys.exit(main())
OLDNEW
« no previous file with comments | « no previous file | build/android/pylib/device/device_list.py » ('j') | build/android/pylib/device/device_list.py » ('J')

Powered by Google App Engine
This is Rietveld 408576698