OLD | NEW |
1 # Copyright (c) 2010 The Chromium OS Authors. All rights reserved. | 1 # Copyright (c) 2010 The Chromium OS Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 import grp, logging, os, pwd, re, stat, subprocess | 5 import grp, logging, os, pwd, re, stat, subprocess |
6 from signal import SIGSEGV | 6 from signal import SIGSEGV |
7 from autotest_lib.client.bin import site_log_reader, site_utils, test | 7 from autotest_lib.client.bin import site_crash_test, site_utils, test |
8 from autotest_lib.client.common_lib import error, utils | 8 from autotest_lib.client.common_lib import error, utils |
9 | 9 |
10 _CONSENT_FILE = '/home/chronos/Consent To Send Stats' | |
11 _CRASH_REPORTER_PATH = '/sbin/crash_reporter' | |
12 _CRASH_SENDER_PATH = '/sbin/crash_sender' | |
13 _CRASH_SENDER_CRON_PATH = '/etc/cron.hourly/crash_sender.hourly' | |
14 _CRASH_SENDER_RATE_DIR = '/var/lib/crash_sender' | |
15 _CRASH_SENDER_RUN_PATH = '/var/run/crash_sender.pid' | |
16 _CORE_PATTERN = '/proc/sys/kernel/core_pattern' | 10 _CORE_PATTERN = '/proc/sys/kernel/core_pattern' |
17 _DAILY_RATE_LIMIT = 8 | |
18 _LEAVE_CORE_PATH = '/etc/leave_core' | 11 _LEAVE_CORE_PATH = '/etc/leave_core' |
19 _MIN_UNIQUE_TIMES = 4 | |
20 _MOCK_CRASH_SENDING = '/tmp/mock-crash-sending' | |
21 _PAUSE_FILE = '/tmp/pause-crash-sending' | |
22 _SECONDS_SEND_SPREAD = 3600 | |
23 _SYSTEM_CRASH_DIR = '/var/spool/crash' | |
24 _USER_CRASH_DIR = '/home/chronos/user/crash' | |
25 | 12 |
26 | 13 |
27 class logging_UserCrash(test.test): | 14 class logging_UserCrash(site_crash_test.CrashTest): |
28 version = 1 | 15 version = 1 |
29 | 16 |
30 | 17 |
31 def setup(self): | 18 def setup(self): |
32 os.chdir(self.srcdir) | 19 os.chdir(self.srcdir) |
33 utils.system('make clean all') | 20 utils.system('make clean all') |
34 | 21 |
35 | 22 |
36 def _set_sending(self, is_enabled): | |
37 if is_enabled: | |
38 if os.path.exists(_PAUSE_FILE): | |
39 os.remove(_PAUSE_FILE) | |
40 else: | |
41 utils.system('touch ' + _PAUSE_FILE) | |
42 | |
43 | |
44 def _reset_rate_limiting(self): | |
45 utils.system('rm -rf ' + _CRASH_SENDER_RATE_DIR) | |
46 | |
47 | |
48 def _clear_spooled_crashes(self): | |
49 utils.system('rm -rf ' + _SYSTEM_CRASH_DIR) | |
50 utils.system('rm -rf ' + _USER_CRASH_DIR) | |
51 | |
52 | |
53 def _kill_running_sender(self): | |
54 if not os.path.exists(_CRASH_SENDER_RUN_PATH): | |
55 return | |
56 running_pid = int(utils.read_file(_CRASH_SENDER_RUN_PATH)) | |
57 logging.warning('Detected running crash sender (%d), killing' % | |
58 running_pid) | |
59 utils.system('kill -9 %d' % running_pid) | |
60 os.remove(_CRASH_SENDER_RUN_PATH) | |
61 | |
62 | |
63 def _set_sending_mock(self, mock_enabled, send_success=True): | |
64 if mock_enabled: | |
65 if send_success: | |
66 data = '' | |
67 else: | |
68 data = '1' | |
69 logging.info('Setting sending mock') | |
70 utils.open_write_close(_MOCK_CRASH_SENDING, data) | |
71 else: | |
72 utils.system('rm -f ' + _MOCK_CRASH_SENDING) | |
73 | |
74 | |
75 def _set_consent(self, has_consent): | |
76 if has_consent: | |
77 utils.open_write_close(_CONSENT_FILE, 'test-consent') | |
78 logging.info('Created ' + _CONSENT_FILE) | |
79 else: | |
80 utils.system('rm -f "%s"' % (_CONSENT_FILE)) | |
81 | |
82 | |
83 def _get_pushed_consent_file_path(self): | |
84 return os.path.join(self.bindir, 'pushed_consent') | |
85 | |
86 | |
87 def _push_consent(self): | |
88 if os.path.exists(_CONSENT_FILE): | |
89 os.rename(_CONSENT_FILE, self._get_pushed_consent_file_path()) | |
90 | |
91 | |
92 def _pop_consent(self): | |
93 self._set_consent(False) | |
94 if os.path.exists(self._get_pushed_consent_file_path()): | |
95 os.rename(self._get_pushed_consent_file_path(), _CONSENT_FILE) | |
96 | |
97 | |
98 def _get_crash_dir(self, username): | |
99 if username == 'chronos': | |
100 return _USER_CRASH_DIR | |
101 else: | |
102 return _SYSTEM_CRASH_DIR | |
103 | |
104 | |
105 def _initialize_crash_reporter(self): | |
106 utils.system('%s --init --nounclean_check' % _CRASH_REPORTER_PATH) | |
107 | |
108 | |
109 def initialize(self): | |
110 test.test.initialize(self) | |
111 self._log_reader = site_log_reader.LogReader() | |
112 | |
113 | |
114 def cleanup(self): | |
115 self._reset_rate_limiting() | |
116 self._clear_spooled_crashes() | |
117 test.test.cleanup(self) | |
118 | |
119 | |
120 def _create_fake_crash_dir_entry(self, name): | |
121 entry = os.path.join(_SYSTEM_CRASH_DIR, name) | |
122 if not os.path.exists(_SYSTEM_CRASH_DIR): | |
123 os.makedirs(_SYSTEM_CRASH_DIR) | |
124 utils.system('touch ' + entry) | |
125 return entry | |
126 | |
127 | |
128 def _prepare_sender_one_crash(self, | |
129 send_success, | |
130 reports_enabled, | |
131 username, | |
132 minidump): | |
133 self._set_sending_mock(mock_enabled=True, send_success=send_success) | |
134 self._set_consent(reports_enabled) | |
135 if minidump is None: | |
136 minidump = self._create_fake_crash_dir_entry('fake.dmp') | |
137 return minidump | |
138 | |
139 | |
140 def _parse_sender_output(self, output): | |
141 """Parse the log output from the crash_sender script. | |
142 | |
143 This script can run on the logs from either a mocked or true | |
144 crash send. | |
145 | |
146 Args: | |
147 output: output from the script | |
148 | |
149 Returns: | |
150 A dictionary with these values: | |
151 send_attempt: did the script attempt to send a crash. | |
152 send_success: if it attempted, was the crash send successful. | |
153 sleep_time: if it attempted, how long did it sleep before | |
154 sending (if mocked, how long would it have slept) | |
155 output: the output from the script, copied | |
156 """ | |
157 sleep_match = re.search('Scheduled to send in (\d+)s', output) | |
158 send_attempt = sleep_match is not None | |
159 if send_attempt: | |
160 sleep_time = int(sleep_match.group(1)) | |
161 else: | |
162 sleep_time = None | |
163 send_success = 'Mocking successful send' in output | |
164 return {'send_attempt': send_attempt, | |
165 'send_success': send_success, | |
166 'sleep_time': sleep_time, | |
167 'output': output} | |
168 | |
169 | |
170 def _call_sender_one_crash(self, | |
171 send_success=True, | |
172 reports_enabled=True, | |
173 username='root', | |
174 minidump=None): | |
175 """Call the crash sender script to mock upload one crash. | |
176 | |
177 Args: | |
178 send_success: Mock a successful send if true | |
179 reports_enabled: Has the user consented to sending crash reports. | |
180 username: user to emulate a crash from | |
181 minidump: minidump to use for crash, if None we create one. | |
182 | |
183 Returns: | |
184 Returns a dictionary describing the result with the keys | |
185 from _parse_sender_output, as well as: | |
186 minidump_exists: does the minidump still exist after calling | |
187 send script | |
188 rate_count: how many crashes have been uploaded in the past | |
189 24 hours. | |
190 """ | |
191 minidump = self._prepare_sender_one_crash(send_success, | |
192 reports_enabled, | |
193 username, | |
194 minidump) | |
195 self._log_reader.set_start_by_current() | |
196 script_output = utils.system_output( | |
197 '/bin/sh -c "%s" 2>&1' % _CRASH_SENDER_PATH, | |
198 ignore_status=True) | |
199 # Wait for up to 2s for no crash_sender to be running, | |
200 # otherwise me might get only part of the output. | |
201 site_utils.poll_for_condition( | |
202 lambda: utils.system('pgrep crash_sender', | |
203 ignore_status=True) != 0, | |
204 timeout=2, | |
205 exception=error.TestError( | |
206 'Timeout waiting for crash_sender to finish: ' + | |
207 self._log_reader.get_logs())) | |
208 | |
209 output = self._log_reader.get_logs() | |
210 logging.debug('Crash sender message output:\n' + output) | |
211 if script_output != '': | |
212 raise error.TestFail( | |
213 'Unexpected crash_sender stdout/stderr: ' + script_output) | |
214 | |
215 if os.path.exists(minidump): | |
216 minidump_exists = True | |
217 os.remove(minidump) | |
218 else: | |
219 minidump_exists = False | |
220 if os.path.exists(_CRASH_SENDER_RATE_DIR): | |
221 rate_count = len(os.listdir(_CRASH_SENDER_RATE_DIR)) | |
222 else: | |
223 rate_count = 0 | |
224 | |
225 result = self._parse_sender_output(output) | |
226 result['minidump_exists'] = minidump_exists | |
227 result['rate_count'] = rate_count | |
228 | |
229 # Show the result for debugging but remove 'output' key | |
230 # since it's large and earlier in debug output. | |
231 debug_result = dict(result) | |
232 del debug_result['output'] | |
233 logging.debug('Result of send (besides output): %s' % debug_result) | |
234 | |
235 return result | |
236 | |
237 | |
238 def _test_reporter_startup(self): | 23 def _test_reporter_startup(self): |
239 """Test that the core_pattern is set up by crash reporter.""" | 24 """Test that the core_pattern is set up by crash reporter.""" |
240 output = utils.read_file(_CORE_PATTERN).rstrip() | 25 output = utils.read_file(_CORE_PATTERN).rstrip() |
241 expected_core_pattern = ('|%s --signal=%%s --pid=%%p' % | 26 expected_core_pattern = ('|%s --signal=%%s --pid=%%p' % |
242 _CRASH_REPORTER_PATH) | 27 self._CRASH_REPORTER_PATH) |
243 if output != expected_core_pattern: | 28 if output != expected_core_pattern: |
244 raise error.TestFail('core pattern should have been %s, not %s' % | 29 raise error.TestFail('core pattern should have been %s, not %s' % |
245 (expected_core_pattern, output)) | 30 (expected_core_pattern, output)) |
246 | 31 |
247 self._log_reader.set_start_by_reboot(-1) | 32 self._log_reader.set_start_by_reboot(-1) |
248 | 33 |
249 if not self._log_reader.can_find('Enabling crash handling'): | 34 if not self._log_reader.can_find('Enabling crash handling'): |
250 raise error.TestFail( | 35 raise error.TestFail( |
251 'user space crash handling was not started during last boot') | 36 'user space crash handling was not started during last boot') |
252 | 37 |
253 | 38 |
254 def _test_reporter_shutdown(self): | 39 def _test_reporter_shutdown(self): |
255 """Test the crash_reporter shutdown code works.""" | 40 """Test the crash_reporter shutdown code works.""" |
256 self._log_reader.set_start_by_current() | 41 self._log_reader.set_start_by_current() |
257 utils.system('%s --clean_shutdown' % _CRASH_REPORTER_PATH) | 42 utils.system('%s --clean_shutdown' % self._CRASH_REPORTER_PATH) |
258 output = utils.read_file(_CORE_PATTERN).rstrip() | 43 output = utils.read_file(_CORE_PATTERN).rstrip() |
259 if output != 'core': | 44 if output != 'core': |
260 raise error.TestFail('core pattern should have been core, not %s' % | 45 raise error.TestFail('core pattern should have been core, not %s' % |
261 output) | 46 output) |
262 | 47 |
263 | 48 |
264 def _test_sender_simple(self): | |
265 """Test sending a single crash.""" | |
266 self._set_sending(True) | |
267 result = self._call_sender_one_crash() | |
268 if (result['minidump_exists'] or | |
269 result['rate_count'] != 1 or | |
270 not result['send_attempt'] or | |
271 not result['send_success'] or | |
272 result['sleep_time'] < 0 or | |
273 result['sleep_time'] >= _SECONDS_SEND_SPREAD): | |
274 raise error.TestFail('Simple send failed') | |
275 | |
276 | |
277 def _test_sender_pausing(self): | |
278 """Test the sender returns immediately when the pause file is present. | |
279 | |
280 This is testing the sender's test functionality - if this regresses, | |
281 other tests can become flaky because the cron-started sender may run | |
282 asynchronously to these tests.""" | |
283 self._set_sending(False) | |
284 result = self._call_sender_one_crash() | |
285 if (not result['minidump_exists'] or | |
286 not 'Exiting early due to' in result['output'] or | |
287 result['send_attempt']): | |
288 raise error.TestFail('Sender did not pause') | |
289 | |
290 | |
291 def _test_sender_reports_disabled(self): | |
292 """Test that when reporting is disabled, we don't send.""" | |
293 self._set_sending(True) | |
294 result = self._call_sender_one_crash(reports_enabled=False) | |
295 if (result['minidump_exists'] or | |
296 not 'Uploading is disabled' in result['output'] or | |
297 result['send_attempt']): | |
298 raise error.TestFail('Sender did not handle reports disabled') | |
299 | |
300 | |
301 def _test_sender_rate_limiting(self): | |
302 """Test the sender properly rate limits and sends with delay.""" | |
303 self._set_sending(True) | |
304 sleep_times = [] | |
305 for i in range(1, _DAILY_RATE_LIMIT + 1): | |
306 result = self._call_sender_one_crash() | |
307 if not result['send_attempt'] or not result['send_success']: | |
308 raise error.TestFail('Crash uploader did not send on #%d' % i) | |
309 if result['rate_count'] != i: | |
310 raise error.TestFail('Did not properly persist rate on #%d' % i) | |
311 sleep_times.append(result['sleep_time']) | |
312 logging.debug('Sleeps between sending crashes were: %s' % sleep_times) | |
313 unique_times = {} | |
314 for i in range(0, _DAILY_RATE_LIMIT): | |
315 unique_times[sleep_times[i]] = True | |
316 if len(unique_times) < _MIN_UNIQUE_TIMES: | |
317 raise error.TestFail('Expected at least %d unique times: %s' % | |
318 _MIN_UNIQUE_TIMES, sleep_times) | |
319 # Now the _DAILY_RATE_LIMIT ^ th send request should fail. | |
320 result = self._call_sender_one_crash() | |
321 if (not result['minidump_exists'] or | |
322 not 'Cannot send more crashes' in result['output'] or | |
323 result['rate_count'] != _DAILY_RATE_LIMIT): | |
324 raise error.TestFail('Crash rate limiting did not take effect') | |
325 | |
326 # Set one rate file a day earlier and verify can send | |
327 rate_files = os.listdir(_CRASH_SENDER_RATE_DIR) | |
328 rate_path = os.path.join(_CRASH_SENDER_RATE_DIR, rate_files[0]) | |
329 statinfo = os.stat(rate_path) | |
330 os.utime(rate_path, (statinfo.st_atime, | |
331 statinfo.st_mtime - (60 * 60 * 25))) | |
332 utils.system('ls -l ' + _CRASH_SENDER_RATE_DIR) | |
333 result = self._call_sender_one_crash() | |
334 if (not result['send_attempt'] or | |
335 not result['send_success'] or | |
336 result['rate_count'] != _DAILY_RATE_LIMIT): | |
337 raise error.TestFail('Crash not sent even after 25hrs pass') | |
338 | |
339 | |
340 def _test_sender_single_instance(self): | |
341 """Test the sender fails to start when another instance is running. | |
342 | |
343 Here we rely on the sender not checking the other running pid | |
344 is of the same instance. | |
345 """ | |
346 self._set_sending(True) | |
347 utils.open_write_close(_CRASH_SENDER_RUN_PATH, str(os.getpid())) | |
348 result = self._call_sender_one_crash() | |
349 if (not 'Already running.' in result['output'] or | |
350 result['send_attempt'] or not result['minidump_exists']): | |
351 raise error.TestFail('Allowed multiple instances to run') | |
352 os.remove(_CRASH_SENDER_RUN_PATH) | |
353 | |
354 | |
355 def _test_sender_send_fails(self): | |
356 """Test that when the send fails we try again later.""" | |
357 self._set_sending(True) | |
358 result = self._call_sender_one_crash(send_success=False) | |
359 if not result['send_attempt'] or result['send_success']: | |
360 raise error.TestError('Did not properly cause a send failure') | |
361 if result['rate_count'] != 1: | |
362 raise error.TestFail('Did not count a failed send against rate ' | |
363 'limiting') | |
364 if not result['minidump_exists']: | |
365 raise error.TestFail('Expected minidump to be saved for later ' | |
366 'sending') | |
367 | |
368 | |
369 def _test_sender_leaves_core_files(self): | |
370 """Test that a core file is left in the send directory. | |
371 | |
372 Core files will only persist for developer/testing images. We | |
373 should never remove such a file.""" | |
374 self._set_sending(True) | |
375 # Call prepare function to make sure the directory exists. | |
376 core_name = 'something.ending.with.core' | |
377 core_path = self._create_fake_crash_dir_entry(core_name) | |
378 result = self._call_sender_one_crash() | |
379 if not 'Ignoring core file.' in result['output']: | |
380 raise error.TestFail('Expected ignoring core file message') | |
381 if not os.path.exists(core_path): | |
382 raise error.TestFail('Core file was removed') | |
383 | |
384 | |
385 def _test_cron_runs(self): | |
386 """Test sender runs successfully as part of the hourly cron job. | |
387 | |
388 Assuming we've run test_sender_simple which shows that a minidump | |
389 gets removed as part of sending, we run the cron job (which is | |
390 asynchronous) and wait for that file to be removed to just verify | |
391 the job eventually runs the sender.""" | |
392 self._set_sending(True) | |
393 minidump = self._prepare_sender_one_crash(send_success=True, | |
394 reports_enabled=True, | |
395 username='root', | |
396 minidump=None) | |
397 if not os.path.exists(minidump): | |
398 raise error.TestError('minidump not created') | |
399 utils.system(_CRASH_SENDER_CRON_PATH) | |
400 self._log_reader.set_start_by_current() | |
401 site_utils.poll_for_condition( | |
402 lambda: not os.path.exists(minidump), | |
403 desc='minidump to be removed') | |
404 crash_sender_log = self._log_reader.get_logs() | |
405 logging.debug('Contents of crash sender log: ' + crash_sender_log) | |
406 result = self._parse_sender_output(crash_sender_log) | |
407 if not result['send_attempt'] or not result['send_success']: | |
408 raise error.TestFail('Cron simple run test failed') | |
409 | |
410 | |
411 def _prepare_crasher(self): | 49 def _prepare_crasher(self): |
412 """Extract the crasher and set its permissions. | 50 """Extract the crasher and set its permissions. |
413 | 51 |
414 crasher is only gzipped to subvert Portage stripping. | 52 crasher is only gzipped to subvert Portage stripping. |
415 """ | 53 """ |
416 self._crasher_path = { | 54 self._crasher_path = { |
417 True: os.path.join(self.srcdir, 'crasher_breakpad'), | 55 True: os.path.join(self.srcdir, 'crasher_breakpad'), |
418 False: os.path.join(self.srcdir, 'crasher_nobreakpad') | 56 False: os.path.join(self.srcdir, 'crasher_nobreakpad') |
419 } | 57 } |
420 utils.system('cd %s; tar xzf crasher.tgz' % | 58 utils.system('cd %s; tar xzf crasher.tgz' % |
(...skipping 334 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
755 os.system('mount -norw,remount /') | 393 os.system('mount -norw,remount /') |
756 if os.path.exists(_LEAVE_CORE_PATH): | 394 if os.path.exists(_LEAVE_CORE_PATH): |
757 os.remove(_LEAVE_CORE_PATH) | 395 os.remove(_LEAVE_CORE_PATH) |
758 self._check_core_file_persisting(False) | 396 self._check_core_file_persisting(False) |
759 | 397 |
760 | 398 |
761 # TODO(kmixter): Test crashing a process as ntp or some other | 399 # TODO(kmixter): Test crashing a process as ntp or some other |
762 # non-root, non-chronos user. | 400 # non-root, non-chronos user. |
763 | 401 |
764 def run_once(self): | 402 def run_once(self): |
765 test_names = [ | 403 self.run_crash_tests([ |
766 'reporter_startup', | 404 'reporter_startup', |
767 'reporter_shutdown', | 405 'reporter_shutdown', |
768 'sender_simple', | |
769 'sender_pausing', | |
770 'sender_reports_disabled', | |
771 'sender_rate_limiting', | |
772 'sender_single_instance', | |
773 'sender_send_fails', | |
774 'sender_leaves_core_files', | |
775 'cron_runs', | |
776 'no_crash', | 406 'no_crash', |
777 'chronos_breakpad_crasher', | 407 'chronos_breakpad_crasher', |
778 'chronos_nobreakpad_crasher', | 408 'chronos_nobreakpad_crasher', |
779 'root_breakpad_crasher', | 409 'root_breakpad_crasher', |
780 'root_nobreakpad_crasher', | 410 'root_nobreakpad_crasher', |
781 'core_file_removed_in_production', | 411 'core_file_removed_in_production', |
782 'core_file_persists_in_debug', | 412 'core_file_persists_in_debug', |
783 ] | 413 ]) |
784 | |
785 self._push_consent() | |
786 | |
787 # Sanity check test_names is complete | |
788 for attr in dir(self): | |
789 if attr.find('_test_') == 0: | |
790 test_name = attr[6:] | |
791 if not test_name in test_names: | |
792 raise error.TestError('Test %s is missing' % test_name) | |
793 | |
794 for test_name in test_names: | |
795 logging.info(('=' * 20) + ('Running %s' % test_name) + ('=' * 20)) | |
796 self._initialize_crash_reporter() | |
797 self._kill_running_sender() | |
798 self._reset_rate_limiting() | |
799 self._clear_spooled_crashes() | |
800 self._set_sending(False) | |
801 getattr(self, '_test_' + test_name)() | |
802 | |
803 | |
804 def cleanup(self): | |
805 self._set_sending(True) | |
806 self._set_sending_mock(mock_enabled=False) | |
807 self._pop_consent() | |
OLD | NEW |