client/site_tests/logging_UserCrash/logging_UserCrash.py - Issue 3138009: Split logging_UserCrash out into two other files

Side by Side Diff: client/site_tests/logging_UserCrash/logging_UserCrash.py

Issue 3138009: Split logging_UserCrash out into two other files (Closed) Base URL: ssh://git@chromiumos-git//autotest.git

Patch Set: Clean up imports Created 10 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « client/site_tests/logging_UncleanShutdown/logging_UncleanShutdown.py ('k') | server/site_tests/logging_UncleanShutdownServer/control » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 # Copyright (c) 2010 The Chromium OS Authors. All rights reserved.	1 # Copyright (c) 2010 The Chromium OS Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 import grp, logging, os, pwd, re, stat, subprocess	5 import grp, logging, os, pwd, re, stat, subprocess

6 from signal import SIGSEGV	6 from signal import SIGSEGV

7 from autotest_lib.client.bin import site_log_reader, site_utils, test	7 from autotest_lib.client.bin import site_crash_test, site_utils, test

8 from autotest_lib.client.common_lib import error, utils	8 from autotest_lib.client.common_lib import error, utils

9	9

10 _CONSENT_FILE = '/home/chronos/Consent To Send Stats'

11 _CRASH_REPORTER_PATH = '/sbin/crash_reporter'

12 _CRASH_SENDER_PATH = '/sbin/crash_sender'

13 _CRASH_SENDER_CRON_PATH = '/etc/cron.hourly/crash_sender.hourly'

14 _CRASH_SENDER_RATE_DIR = '/var/lib/crash_sender'

15 _CRASH_SENDER_RUN_PATH = '/var/run/crash_sender.pid'

16 _CORE_PATTERN = '/proc/sys/kernel/core_pattern'	10 _CORE_PATTERN = '/proc/sys/kernel/core_pattern'

17 _DAILY_RATE_LIMIT = 8

18 _LEAVE_CORE_PATH = '/etc/leave_core'	11 _LEAVE_CORE_PATH = '/etc/leave_core'

19 _MIN_UNIQUE_TIMES = 4

20 _MOCK_CRASH_SENDING = '/tmp/mock-crash-sending'

21 _PAUSE_FILE = '/tmp/pause-crash-sending'

22 _SECONDS_SEND_SPREAD = 3600

23 _SYSTEM_CRASH_DIR = '/var/spool/crash'

24 _USER_CRASH_DIR = '/home/chronos/user/crash'

25	12

26	13

27 class logging_UserCrash(test.test):	14 class logging_UserCrash(site_crash_test.CrashTest):

28 version = 1	15 version = 1

29	16

30	17

31 def setup(self):	18 def setup(self):

32 os.chdir(self.srcdir)	19 os.chdir(self.srcdir)

33 utils.system('make clean all')	20 utils.system('make clean all')

34	21

35	22

36 def _set_sending(self, is_enabled):

37 if is_enabled:

38 if os.path.exists(_PAUSE_FILE):

39 os.remove(_PAUSE_FILE)

40 else:

41 utils.system('touch ' + _PAUSE_FILE)

42

43

44 def _reset_rate_limiting(self):

45 utils.system('rm -rf ' + _CRASH_SENDER_RATE_DIR)

46

47

48 def _clear_spooled_crashes(self):

49 utils.system('rm -rf ' + _SYSTEM_CRASH_DIR)

50 utils.system('rm -rf ' + _USER_CRASH_DIR)

51

52

53 def _kill_running_sender(self):

54 if not os.path.exists(_CRASH_SENDER_RUN_PATH):

55 return

56 running_pid = int(utils.read_file(_CRASH_SENDER_RUN_PATH))

57 logging.warning('Detected running crash sender (%d), killing' %

58 running_pid)

59 utils.system('kill -9 %d' % running_pid)

60 os.remove(_CRASH_SENDER_RUN_PATH)

61

62

63 def _set_sending_mock(self, mock_enabled, send_success=True):

64 if mock_enabled:

65 if send_success:

66 data = ''

67 else:

68 data = '1'

69 logging.info('Setting sending mock')

70 utils.open_write_close(_MOCK_CRASH_SENDING, data)

71 else:

72 utils.system('rm -f ' + _MOCK_CRASH_SENDING)

73

74

75 def _set_consent(self, has_consent):

76 if has_consent:

77 utils.open_write_close(_CONSENT_FILE, 'test-consent')

78 logging.info('Created ' + _CONSENT_FILE)

79 else:

80 utils.system('rm -f "%s"' % (_CONSENT_FILE))

81

82

83 def _get_pushed_consent_file_path(self):

84 return os.path.join(self.bindir, 'pushed_consent')

85

86

87 def _push_consent(self):

88 if os.path.exists(_CONSENT_FILE):

89 os.rename(_CONSENT_FILE, self._get_pushed_consent_file_path())

90

91

92 def _pop_consent(self):

93 self._set_consent(False)

94 if os.path.exists(self._get_pushed_consent_file_path()):

95 os.rename(self._get_pushed_consent_file_path(), _CONSENT_FILE)

96

97

98 def _get_crash_dir(self, username):

99 if username == 'chronos':

100 return _USER_CRASH_DIR

101 else:

102 return _SYSTEM_CRASH_DIR

103

104

105 def _initialize_crash_reporter(self):

106 utils.system('%s --init --nounclean_check' % _CRASH_REPORTER_PATH)

107

108

109 def initialize(self):

110 test.test.initialize(self)

111 self._log_reader = site_log_reader.LogReader()

112

113

114 def cleanup(self):

115 self._reset_rate_limiting()

116 self._clear_spooled_crashes()

117 test.test.cleanup(self)

118

119

120 def _create_fake_crash_dir_entry(self, name):

121 entry = os.path.join(_SYSTEM_CRASH_DIR, name)

122 if not os.path.exists(_SYSTEM_CRASH_DIR):

123 os.makedirs(_SYSTEM_CRASH_DIR)

124 utils.system('touch ' + entry)

125 return entry

126

127

128 def _prepare_sender_one_crash(self,

129 send_success,

130 reports_enabled,

131 username,

132 minidump):

133 self._set_sending_mock(mock_enabled=True, send_success=send_success)

134 self._set_consent(reports_enabled)

135 if minidump is None:

136 minidump = self._create_fake_crash_dir_entry('fake.dmp')

137 return minidump

138

139

140 def _parse_sender_output(self, output):

141 """Parse the log output from the crash_sender script.

142

143 This script can run on the logs from either a mocked or true

144 crash send.

145

146 Args:

147 output: output from the script

148

149 Returns:

150 A dictionary with these values:

151 send_attempt: did the script attempt to send a crash.

152 send_success: if it attempted, was the crash send successful.

153 sleep_time: if it attempted, how long did it sleep before

154 sending (if mocked, how long would it have slept)

155 output: the output from the script, copied

156 """

157 sleep_match = re.search('Scheduled to send in (\d+)s', output)

158 send_attempt = sleep_match is not None

159 if send_attempt:

160 sleep_time = int(sleep_match.group(1))

161 else:

162 sleep_time = None

163 send_success = 'Mocking successful send' in output

164 return {'send_attempt': send_attempt,

165 'send_success': send_success,

166 'sleep_time': sleep_time,

167 'output': output}

168

169

170 def _call_sender_one_crash(self,

171 send_success=True,

172 reports_enabled=True,

173 username='root',

174 minidump=None):

175 """Call the crash sender script to mock upload one crash.

176

177 Args:

178 send_success: Mock a successful send if true

179 reports_enabled: Has the user consented to sending crash reports.

180 username: user to emulate a crash from

181 minidump: minidump to use for crash, if None we create one.

182

183 Returns:

184 Returns a dictionary describing the result with the keys

185 from _parse_sender_output, as well as:

186 minidump_exists: does the minidump still exist after calling

187 send script

188 rate_count: how many crashes have been uploaded in the past

189 24 hours.

190 """

191 minidump = self._prepare_sender_one_crash(send_success,

192 reports_enabled,

193 username,

194 minidump)

195 self._log_reader.set_start_by_current()

196 script_output = utils.system_output(

197 '/bin/sh -c "%s" 2>&1' % _CRASH_SENDER_PATH,

198 ignore_status=True)

199 # Wait for up to 2s for no crash_sender to be running,

200 # otherwise me might get only part of the output.

201 site_utils.poll_for_condition(

202 lambda: utils.system('pgrep crash_sender',

203 ignore_status=True) != 0,

204 timeout=2,

205 exception=error.TestError(

206 'Timeout waiting for crash_sender to finish: ' +

207 self._log_reader.get_logs()))

208

209 output = self._log_reader.get_logs()

210 logging.debug('Crash sender message output:\n' + output)

211 if script_output != '':

212 raise error.TestFail(

213 'Unexpected crash_sender stdout/stderr: ' + script_output)

214

215 if os.path.exists(minidump):

216 minidump_exists = True

217 os.remove(minidump)

218 else:

219 minidump_exists = False

220 if os.path.exists(_CRASH_SENDER_RATE_DIR):

221 rate_count = len(os.listdir(_CRASH_SENDER_RATE_DIR))

222 else:

223 rate_count = 0

224

225 result = self._parse_sender_output(output)

226 result['minidump_exists'] = minidump_exists

227 result['rate_count'] = rate_count

228

229 # Show the result for debugging but remove 'output' key

230 # since it's large and earlier in debug output.

231 debug_result = dict(result)

232 del debug_result['output']

233 logging.debug('Result of send (besides output): %s' % debug_result)

234

235 return result

236

237

238 def _test_reporter_startup(self):	23 def _test_reporter_startup(self):

239 """Test that the core_pattern is set up by crash reporter."""	24 """Test that the core_pattern is set up by crash reporter."""

240 output = utils.read_file(_CORE_PATTERN).rstrip()	25 output = utils.read_file(_CORE_PATTERN).rstrip()

241 expected_core_pattern = ('\|%s --signal=%%s --pid=%%p' %	26 expected_core_pattern = ('\|%s --signal=%%s --pid=%%p' %

242 _CRASH_REPORTER_PATH)	27 self._CRASH_REPORTER_PATH)

243 if output != expected_core_pattern:	28 if output != expected_core_pattern:

244 raise error.TestFail('core pattern should have been %s, not %s' %	29 raise error.TestFail('core pattern should have been %s, not %s' %

245 (expected_core_pattern, output))	30 (expected_core_pattern, output))

246	31

247 self._log_reader.set_start_by_reboot(-1)	32 self._log_reader.set_start_by_reboot(-1)

248	33

249 if not self._log_reader.can_find('Enabling crash handling'):	34 if not self._log_reader.can_find('Enabling crash handling'):

250 raise error.TestFail(	35 raise error.TestFail(

251 'user space crash handling was not started during last boot')	36 'user space crash handling was not started during last boot')

252	37

253	38

254 def _test_reporter_shutdown(self):	39 def _test_reporter_shutdown(self):

255 """Test the crash_reporter shutdown code works."""	40 """Test the crash_reporter shutdown code works."""

256 self._log_reader.set_start_by_current()	41 self._log_reader.set_start_by_current()

257 utils.system('%s --clean_shutdown' % _CRASH_REPORTER_PATH)	42 utils.system('%s --clean_shutdown' % self._CRASH_REPORTER_PATH)

258 output = utils.read_file(_CORE_PATTERN).rstrip()	43 output = utils.read_file(_CORE_PATTERN).rstrip()

259 if output != 'core':	44 if output != 'core':

260 raise error.TestFail('core pattern should have been core, not %s' %	45 raise error.TestFail('core pattern should have been core, not %s' %

261 output)	46 output)

262	47

263	48

264 def _test_sender_simple(self):

265 """Test sending a single crash."""

266 self._set_sending(True)

267 result = self._call_sender_one_crash()

268 if (result['minidump_exists'] or

269 result['rate_count'] != 1 or

270 not result['send_attempt'] or

271 not result['send_success'] or

272 result['sleep_time'] < 0 or

273 result['sleep_time'] >= _SECONDS_SEND_SPREAD):

274 raise error.TestFail('Simple send failed')

275

276

277 def _test_sender_pausing(self):

278 """Test the sender returns immediately when the pause file is present.

279

280 This is testing the sender's test functionality - if this regresses,

281 other tests can become flaky because the cron-started sender may run

282 asynchronously to these tests."""

283 self._set_sending(False)

284 result = self._call_sender_one_crash()

285 if (not result['minidump_exists'] or

286 not 'Exiting early due to' in result['output'] or

287 result['send_attempt']):

288 raise error.TestFail('Sender did not pause')

289

290

291 def _test_sender_reports_disabled(self):

292 """Test that when reporting is disabled, we don't send."""

293 self._set_sending(True)

294 result = self._call_sender_one_crash(reports_enabled=False)

295 if (result['minidump_exists'] or

296 not 'Uploading is disabled' in result['output'] or

297 result['send_attempt']):

298 raise error.TestFail('Sender did not handle reports disabled')

299

300

301 def _test_sender_rate_limiting(self):

302 """Test the sender properly rate limits and sends with delay."""

303 self._set_sending(True)

304 sleep_times = []

305 for i in range(1, _DAILY_RATE_LIMIT + 1):

306 result = self._call_sender_one_crash()

307 if not result['send_attempt'] or not result['send_success']:

308 raise error.TestFail('Crash uploader did not send on #%d' % i)

309 if result['rate_count'] != i:

310 raise error.TestFail('Did not properly persist rate on #%d' % i)

311 sleep_times.append(result['sleep_time'])

312 logging.debug('Sleeps between sending crashes were: %s' % sleep_times)

313 unique_times = {}

314 for i in range(0, _DAILY_RATE_LIMIT):

315 unique_times[sleep_times[i]] = True

316 if len(unique_times) < _MIN_UNIQUE_TIMES:

317 raise error.TestFail('Expected at least %d unique times: %s' %

318 _MIN_UNIQUE_TIMES, sleep_times)

319 # Now the _DAILY_RATE_LIMIT ^ th send request should fail.

320 result = self._call_sender_one_crash()

321 if (not result['minidump_exists'] or

322 not 'Cannot send more crashes' in result['output'] or

323 result['rate_count'] != _DAILY_RATE_LIMIT):

324 raise error.TestFail('Crash rate limiting did not take effect')

325

326 # Set one rate file a day earlier and verify can send

327 rate_files = os.listdir(_CRASH_SENDER_RATE_DIR)

328 rate_path = os.path.join(_CRASH_SENDER_RATE_DIR, rate_files[0])

329 statinfo = os.stat(rate_path)

330 os.utime(rate_path, (statinfo.st_atime,

331 statinfo.st_mtime - (60 * 60 * 25)))

332 utils.system('ls -l ' + _CRASH_SENDER_RATE_DIR)

333 result = self._call_sender_one_crash()

334 if (not result['send_attempt'] or

335 not result['send_success'] or

336 result['rate_count'] != _DAILY_RATE_LIMIT):

337 raise error.TestFail('Crash not sent even after 25hrs pass')

338

339

340 def _test_sender_single_instance(self):

341 """Test the sender fails to start when another instance is running.

342

343 Here we rely on the sender not checking the other running pid

344 is of the same instance.

345 """

346 self._set_sending(True)

347 utils.open_write_close(_CRASH_SENDER_RUN_PATH, str(os.getpid()))

348 result = self._call_sender_one_crash()

349 if (not 'Already running.' in result['output'] or

350 result['send_attempt'] or not result['minidump_exists']):

351 raise error.TestFail('Allowed multiple instances to run')

352 os.remove(_CRASH_SENDER_RUN_PATH)

353

354

355 def _test_sender_send_fails(self):

356 """Test that when the send fails we try again later."""

357 self._set_sending(True)

358 result = self._call_sender_one_crash(send_success=False)

359 if not result['send_attempt'] or result['send_success']:

360 raise error.TestError('Did not properly cause a send failure')

361 if result['rate_count'] != 1:

362 raise error.TestFail('Did not count a failed send against rate '

363 'limiting')

364 if not result['minidump_exists']:

365 raise error.TestFail('Expected minidump to be saved for later '

366 'sending')

367

368

369 def _test_sender_leaves_core_files(self):

370 """Test that a core file is left in the send directory.

371

372 Core files will only persist for developer/testing images. We

373 should never remove such a file."""

374 self._set_sending(True)

375 # Call prepare function to make sure the directory exists.

376 core_name = 'something.ending.with.core'

377 core_path = self._create_fake_crash_dir_entry(core_name)

378 result = self._call_sender_one_crash()

379 if not 'Ignoring core file.' in result['output']:

380 raise error.TestFail('Expected ignoring core file message')

381 if not os.path.exists(core_path):

382 raise error.TestFail('Core file was removed')

383

384

385 def _test_cron_runs(self):

386 """Test sender runs successfully as part of the hourly cron job.

387

388 Assuming we've run test_sender_simple which shows that a minidump

389 gets removed as part of sending, we run the cron job (which is

390 asynchronous) and wait for that file to be removed to just verify

391 the job eventually runs the sender."""

392 self._set_sending(True)

393 minidump = self._prepare_sender_one_crash(send_success=True,

394 reports_enabled=True,

395 username='root',

396 minidump=None)

397 if not os.path.exists(minidump):

398 raise error.TestError('minidump not created')

399 utils.system(_CRASH_SENDER_CRON_PATH)

400 self._log_reader.set_start_by_current()

401 site_utils.poll_for_condition(

402 lambda: not os.path.exists(minidump),

403 desc='minidump to be removed')

404 crash_sender_log = self._log_reader.get_logs()

405 logging.debug('Contents of crash sender log: ' + crash_sender_log)

406 result = self._parse_sender_output(crash_sender_log)

407 if not result['send_attempt'] or not result['send_success']:

408 raise error.TestFail('Cron simple run test failed')

409

410

411 def _prepare_crasher(self):	49 def _prepare_crasher(self):

412 """Extract the crasher and set its permissions.	50 """Extract the crasher and set its permissions.

413	51

414 crasher is only gzipped to subvert Portage stripping.	52 crasher is only gzipped to subvert Portage stripping.

415 """	53 """

416 self._crasher_path = {	54 self._crasher_path = {

417 True: os.path.join(self.srcdir, 'crasher_breakpad'),	55 True: os.path.join(self.srcdir, 'crasher_breakpad'),

418 False: os.path.join(self.srcdir, 'crasher_nobreakpad')	56 False: os.path.join(self.srcdir, 'crasher_nobreakpad')

419 }	57 }

420 utils.system('cd %s; tar xzf crasher.tgz' %	58 utils.system('cd %s; tar xzf crasher.tgz' %

(...skipping 334 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
755 os.system('mount -norw,remount /')	393 os.system('mount -norw,remount /')

756 if os.path.exists(_LEAVE_CORE_PATH):	394 if os.path.exists(_LEAVE_CORE_PATH):

757 os.remove(_LEAVE_CORE_PATH)	395 os.remove(_LEAVE_CORE_PATH)

758 self._check_core_file_persisting(False)	396 self._check_core_file_persisting(False)

759	397

760	398

761 # TODO(kmixter): Test crashing a process as ntp or some other	399 # TODO(kmixter): Test crashing a process as ntp or some other

762 # non-root, non-chronos user.	400 # non-root, non-chronos user.

763	401

764 def run_once(self):	402 def run_once(self):

765 test_names = [	403 self.run_crash_tests([

766 'reporter_startup',	404 'reporter_startup',

767 'reporter_shutdown',	405 'reporter_shutdown',

768 'sender_simple',

769 'sender_pausing',

770 'sender_reports_disabled',

771 'sender_rate_limiting',

772 'sender_single_instance',

773 'sender_send_fails',

774 'sender_leaves_core_files',

775 'cron_runs',

776 'no_crash',	406 'no_crash',

777 'chronos_breakpad_crasher',	407 'chronos_breakpad_crasher',

778 'chronos_nobreakpad_crasher',	408 'chronos_nobreakpad_crasher',

779 'root_breakpad_crasher',	409 'root_breakpad_crasher',

780 'root_nobreakpad_crasher',	410 'root_nobreakpad_crasher',

781 'core_file_removed_in_production',	411 'core_file_removed_in_production',

782 'core_file_persists_in_debug',	412 'core_file_persists_in_debug',

783 ]	413 ])

784

785 self._push_consent()

786

787 # Sanity check test_names is complete

788 for attr in dir(self):

789 if attr.find('_test_') == 0:

790 test_name = attr[6:]

791 if not test_name in test_names:

792 raise error.TestError('Test %s is missing' % test_name)

793

794 for test_name in test_names:

795 logging.info(('=' * 20) + ('Running %s' % test_name) + ('=' * 20))

796 self._initialize_crash_reporter()

797 self._kill_running_sender()

798 self._reset_rate_limiting()

799 self._clear_spooled_crashes()

800 self._set_sending(False)

801 getattr(self, '_test_' + test_name)()

802

803

804 def cleanup(self):

805 self._set_sending(True)

806 self._set_sending_mock(mock_enabled=False)

807 self._pop_consent()

OLD	NEW