OLD | NEW |
| (Empty) |
1 #!/usr/bin/env python | |
2 # Copyright 2013 The Chromium Authors. All rights reserved. | |
3 # Use of this source code is governed by a BSD-style license that can be | |
4 # found in the LICENSE file. | |
5 | |
6 """Runs each test cases as a single shard, single process execution. | |
7 | |
8 Similar to sharding_supervisor.py but finer grained. It runs each test case | |
9 individually instead of running per shard. Runs multiple instances in parallel. | |
10 """ | |
11 | |
12 import datetime | |
13 import fnmatch | |
14 import json | |
15 import logging | |
16 import optparse | |
17 import os | |
18 import random | |
19 import re | |
20 import subprocess | |
21 import sys | |
22 import threading | |
23 import time | |
24 from xml.dom import minidom | |
25 import xml.parsers.expat | |
26 | |
27 # Directory with this file. | |
28 BASE_DIR = os.path.dirname(os.path.abspath(__file__)) | |
29 # Root of a repository. | |
30 ROOT_DIR = os.path.dirname(BASE_DIR) | |
31 # Name of the optional package with all dependencies. | |
32 DEPENDENCIES_ZIP = os.path.join(BASE_DIR, 'run_isolated.zip') | |
33 | |
34 # When running in isolated environment, dependencies is in zipped package. | |
35 if os.path.exists(DEPENDENCIES_ZIP): | |
36 sys.path.insert(0, DEPENDENCIES_ZIP) | |
37 else: | |
38 # Otherwise it is in the root of the repository. | |
39 if not ROOT_DIR in sys.path: | |
40 sys.path.insert(0, ROOT_DIR) | |
41 | |
42 | |
43 from utils import threading_utils | |
44 from utils import tools | |
45 | |
46 | |
47 # These are known to influence the way the output is generated. | |
48 KNOWN_GTEST_ENV_VARS = [ | |
49 'GTEST_ALSO_RUN_DISABLED_TESTS', | |
50 'GTEST_BREAK_ON_FAILURE', | |
51 'GTEST_CATCH_EXCEPTIONS', | |
52 'GTEST_COLOR', | |
53 'GTEST_FILTER', | |
54 'GTEST_OUTPUT', | |
55 'GTEST_PRINT_TIME', | |
56 'GTEST_RANDOM_SEED', | |
57 'GTEST_REPEAT', | |
58 'GTEST_SHARD_INDEX', | |
59 'GTEST_SHARD_STATUS_FILE', | |
60 'GTEST_SHUFFLE', | |
61 'GTEST_THROW_ON_FAILURE', | |
62 'GTEST_TOTAL_SHARDS', | |
63 ] | |
64 | |
65 # These needs to be poped out before running a test. | |
66 GTEST_ENV_VARS_TO_REMOVE = [ | |
67 'GTEST_ALSO_RUN_DISABLED_TESTS', | |
68 'GTEST_FILTER', | |
69 'GTEST_OUTPUT', | |
70 'GTEST_RANDOM_SEED', | |
71 # TODO(maruel): Handle. | |
72 'GTEST_REPEAT', | |
73 'GTEST_SHARD_INDEX', | |
74 # TODO(maruel): Handle. | |
75 'GTEST_SHUFFLE', | |
76 'GTEST_TOTAL_SHARDS', | |
77 ] | |
78 | |
79 | |
80 RUN_PREFIX = '[ RUN ] ' | |
81 OK_PREFIX = '[ OK ] ' | |
82 FAILED_PREFIX = '[ FAILED ] ' | |
83 | |
84 | |
85 if subprocess.mswindows: | |
86 import msvcrt # pylint: disable=F0401 | |
87 from ctypes import wintypes | |
88 from ctypes import windll | |
89 | |
90 def ReadFile(handle, desired_bytes): | |
91 """Calls kernel32.ReadFile().""" | |
92 c_read = wintypes.DWORD() | |
93 buff = wintypes.create_string_buffer(desired_bytes+1) | |
94 windll.kernel32.ReadFile( | |
95 handle, buff, desired_bytes, wintypes.byref(c_read), None) | |
96 # NULL terminate it. | |
97 buff[c_read.value] = '\x00' | |
98 return wintypes.GetLastError(), buff.value | |
99 | |
100 def PeekNamedPipe(handle): | |
101 """Calls kernel32.PeekNamedPipe(). Simplified version.""" | |
102 c_avail = wintypes.DWORD() | |
103 c_message = wintypes.DWORD() | |
104 success = windll.kernel32.PeekNamedPipe( | |
105 handle, None, 0, None, wintypes.byref(c_avail), | |
106 wintypes.byref(c_message)) | |
107 if not success: | |
108 raise OSError(wintypes.GetLastError()) | |
109 return c_avail.value | |
110 | |
111 def recv_multi_impl(conns, maxsize, timeout): | |
112 """Reads from the first available pipe. | |
113 | |
114 If timeout is None, it's blocking. If timeout is 0, it is not blocking. | |
115 """ | |
116 # TODO(maruel): Use WaitForMultipleObjects(). Python creates anonymous pipes | |
117 # for proc.stdout and proc.stderr but they are implemented as named pipes on | |
118 # Windows. Since named pipes are not waitable object, they can't be passed | |
119 # as-is to WFMO(). So this means N times CreateEvent(), N times ReadFile() | |
120 # and finally WFMO(). This requires caching the events handles in the Popen | |
121 # object and remembering the pending ReadFile() calls. This will require | |
122 # some re-architecture. | |
123 maxsize = max(maxsize or 16384, 1) | |
124 if timeout: | |
125 start = time.time() | |
126 handles = [msvcrt.get_osfhandle(conn.fileno()) for conn in conns] | |
127 while handles: | |
128 for i, handle in enumerate(handles): | |
129 try: | |
130 avail = min(PeekNamedPipe(handle), maxsize) | |
131 if avail: | |
132 return i, ReadFile(handle, avail)[1] | |
133 if (timeout and (time.time() - start) >= timeout) or timeout == 0: | |
134 return None, None | |
135 # Polling rocks. | |
136 time.sleep(0.001) | |
137 except OSError: | |
138 handles.pop(i) | |
139 break | |
140 # Nothing to wait for. | |
141 return None, None | |
142 | |
143 else: | |
144 import fcntl # pylint: disable=F0401 | |
145 import select | |
146 | |
147 def recv_multi_impl(conns, maxsize, timeout): | |
148 """Reads from the first available pipe. | |
149 | |
150 If timeout is None, it's blocking. If timeout is 0, it is not blocking. | |
151 """ | |
152 try: | |
153 r, _, _ = select.select(conns, [], [], timeout) | |
154 except select.error: | |
155 return None, None | |
156 if not r: | |
157 return None, None | |
158 | |
159 conn = r[0] | |
160 # Temporarily make it non-blocking. | |
161 flags = fcntl.fcntl(conn, fcntl.F_GETFL) | |
162 if not conn.closed: | |
163 # pylint: disable=E1101 | |
164 fcntl.fcntl(conn, fcntl.F_SETFL, flags | os.O_NONBLOCK) | |
165 try: | |
166 data = conn.read(max(maxsize or 16384, 1)) | |
167 return conns.index(conn), data | |
168 finally: | |
169 if not conn.closed: | |
170 fcntl.fcntl(conn, fcntl.F_SETFL, flags) | |
171 | |
172 | |
173 class Failure(Exception): | |
174 pass | |
175 | |
176 | |
177 class Popen(subprocess.Popen): | |
178 """Adds timeout support on stdout and stderr. | |
179 | |
180 Inspired by | |
181 http://code.activestate.com/recipes/440554-module-to-allow-asynchronous-subpro
cess-use-on-win/ | |
182 """ | |
183 def __init__(self, *args, **kwargs): | |
184 self.start = time.time() | |
185 self.end = None | |
186 super(Popen, self).__init__(*args, **kwargs) | |
187 | |
188 def duration(self): | |
189 """Duration of the child process. | |
190 | |
191 It is greater or equal to the actual time the child process ran. It can be | |
192 significantly higher than the real value if neither .wait() nor .poll() was | |
193 used. | |
194 """ | |
195 return (self.end or time.time()) - self.start | |
196 | |
197 def wait(self): | |
198 ret = super(Popen, self).wait() | |
199 if not self.end: | |
200 # communicate() uses wait() internally. | |
201 self.end = time.time() | |
202 return ret | |
203 | |
204 def poll(self): | |
205 ret = super(Popen, self).poll() | |
206 if ret is not None and not self.end: | |
207 self.end = time.time() | |
208 return ret | |
209 | |
210 def yield_any(self, timeout=None): | |
211 """Yields output until the process terminates or is killed by a timeout. | |
212 | |
213 Yielded values are in the form (pipename, data). | |
214 | |
215 If timeout is None, it is blocking. If timeout is 0, it doesn't block. This | |
216 is not generally useful to use timeout=0. | |
217 """ | |
218 remaining = 0 | |
219 while self.poll() is None: | |
220 if timeout: | |
221 # While these float() calls seem redundant, they are to force | |
222 # ResetableTimeout to "render" itself into a float. At each call, the | |
223 # resulting value could be different, depending if a .reset() call | |
224 # occurred. | |
225 remaining = max(float(timeout) - self.duration(), 0.001) | |
226 else: | |
227 remaining = timeout | |
228 t, data = self.recv_any(timeout=remaining) | |
229 if data or timeout == 0: | |
230 yield (t, data) | |
231 if timeout and self.duration() >= float(timeout): | |
232 break | |
233 if self.poll() is None and timeout and self.duration() >= float(timeout): | |
234 logging.debug('Kill %s %s', self.duration(), float(timeout)) | |
235 self.kill() | |
236 self.wait() | |
237 # Read all remaining output in the pipes. | |
238 while True: | |
239 t, data = self.recv_any() | |
240 if not data: | |
241 break | |
242 yield (t, data) | |
243 | |
244 def recv_any(self, maxsize=None, timeout=None): | |
245 """Reads from stderr and if empty, from stdout. | |
246 | |
247 If timeout is None, it is blocking. If timeout is 0, it doesn't block. | |
248 """ | |
249 pipes = [ | |
250 x for x in ((self.stderr, 'stderr'), (self.stdout, 'stdout')) if x[0] | |
251 ] | |
252 if len(pipes) == 2 and self.stderr.fileno() == self.stdout.fileno(): | |
253 pipes.pop(0) | |
254 if not pipes: | |
255 return None, None | |
256 conns, names = zip(*pipes) | |
257 index, data = recv_multi_impl(conns, maxsize, timeout) | |
258 if index is None: | |
259 return index, data | |
260 if not data: | |
261 self._close(names[index]) | |
262 return None, None | |
263 if self.universal_newlines: | |
264 data = self._translate_newlines(data) | |
265 return names[index], data | |
266 | |
267 def recv_out(self, maxsize=None, timeout=None): | |
268 """Reads from stdout asynchronously.""" | |
269 return self._recv('stdout', maxsize, timeout) | |
270 | |
271 def recv_err(self, maxsize=None, timeout=None): | |
272 """Reads from stderr asynchronously.""" | |
273 return self._recv('stderr', maxsize, timeout) | |
274 | |
275 def _close(self, which): | |
276 getattr(self, which).close() | |
277 setattr(self, which, None) | |
278 | |
279 def _recv(self, which, maxsize, timeout): | |
280 conn = getattr(self, which) | |
281 if conn is None: | |
282 return None | |
283 data = recv_multi_impl([conn], maxsize, timeout) | |
284 if not data: | |
285 return self._close(which) | |
286 if self.universal_newlines: | |
287 data = self._translate_newlines(data) | |
288 return data | |
289 | |
290 | |
291 def call_with_timeout(cmd, timeout, **kwargs): | |
292 """Runs an executable with an optional timeout. | |
293 | |
294 timeout 0 or None disables the timeout. | |
295 """ | |
296 proc = Popen( | |
297 cmd, | |
298 stdin=subprocess.PIPE, | |
299 stdout=subprocess.PIPE, | |
300 **kwargs) | |
301 if timeout: | |
302 out = '' | |
303 err = '' | |
304 for t, data in proc.yield_any(timeout): | |
305 if t == 'stdout': | |
306 out += data | |
307 else: | |
308 err += data | |
309 else: | |
310 # This code path is much faster. | |
311 out, err = proc.communicate() | |
312 return out, err, proc.returncode, proc.duration() | |
313 | |
314 | |
315 def setup_gtest_env(): | |
316 """Copy the enviroment variables and setup for running a gtest.""" | |
317 env = os.environ.copy() | |
318 for name in GTEST_ENV_VARS_TO_REMOVE: | |
319 env.pop(name, None) | |
320 | |
321 # Forcibly enable color by default, if not already disabled. | |
322 env.setdefault('GTEST_COLOR', 'on') | |
323 | |
324 return env | |
325 | |
326 | |
327 def gtest_list_tests(cmd, cwd): | |
328 """List all the test cases for a google test. | |
329 | |
330 See more info at http://code.google.com/p/googletest/. | |
331 """ | |
332 cmd = cmd[:] | |
333 cmd.append('--gtest_list_tests') | |
334 env = setup_gtest_env() | |
335 timeout = 0. | |
336 try: | |
337 out, err, returncode, _ = call_with_timeout( | |
338 cmd, | |
339 timeout, | |
340 stderr=subprocess.PIPE, | |
341 env=env, | |
342 cwd=cwd) | |
343 except OSError, e: | |
344 raise Failure('Failed to run %s\ncwd=%s\n%s' % (' '.join(cmd), cwd, str(e))) | |
345 if returncode: | |
346 raise Failure( | |
347 'Failed to run %s\nstdout:\n%s\nstderr:\n%s' % | |
348 (' '.join(cmd), out, err), returncode) | |
349 # pylint: disable=E1103 | |
350 if err and not err.startswith('Xlib: extension "RANDR" missing on display '): | |
351 logging.error('Unexpected spew in gtest_list_tests:\n%s\n%s', err, cmd) | |
352 return out | |
353 | |
354 | |
355 def filter_shards(tests, index, shards): | |
356 """Filters the shards. | |
357 | |
358 Watch out about integer based arithmetics. | |
359 """ | |
360 # The following code could be made more terse but I liked the extra clarity. | |
361 assert 0 <= index < shards | |
362 total = len(tests) | |
363 quotient, remainder = divmod(total, shards) | |
364 # 1 item of each remainder is distributed over the first 0:remainder shards. | |
365 # For example, with total == 5, index == 1, shards == 3 | |
366 # min_bound == 2, max_bound == 4. | |
367 min_bound = quotient * index + min(index, remainder) | |
368 max_bound = quotient * (index + 1) + min(index + 1, remainder) | |
369 return tests[min_bound:max_bound] | |
370 | |
371 | |
372 def _starts_with(a, b, prefix): | |
373 return a.startswith(prefix) or b.startswith(prefix) | |
374 | |
375 | |
376 def is_valid_test_case(test, disabled): | |
377 """Returns False on malformed or DISABLED_ test cases.""" | |
378 if not '.' in test: | |
379 logging.error('Ignoring unknown test %s', test) | |
380 return False | |
381 fixture, case = test.split('.', 1) | |
382 if not disabled and _starts_with(fixture, case, 'DISABLED_'): | |
383 return False | |
384 return True | |
385 | |
386 | |
387 def filter_bad_tests(tests, disabled): | |
388 """Filters out malformed or DISABLED_ test cases.""" | |
389 return [test for test in tests if is_valid_test_case(test, disabled)] | |
390 | |
391 | |
392 def chromium_is_valid_test_case(test, disabled, fails, flaky, pre, manual): | |
393 """Return False on chromium specific bad tests in addition to | |
394 is_valid_test_case(). | |
395 | |
396 FAILS_, FLAKY_, PRE_, MANUAL_ and other weird Chromium-specific test cases. | |
397 """ | |
398 if not is_valid_test_case(test, disabled): | |
399 return False | |
400 fixture, case = test.split('.', 1) | |
401 if not fails and _starts_with(fixture, case, 'FAILS_'): | |
402 return False | |
403 if not flaky and _starts_with(fixture, case, 'FLAKY_'): | |
404 return False | |
405 if not pre and _starts_with(fixture, case, 'PRE_'): | |
406 return False | |
407 if not manual and _starts_with(fixture, case, 'MANUAL_'): | |
408 return False | |
409 if test == 'InProcessBrowserTest.Empty': | |
410 return False | |
411 return True | |
412 | |
413 | |
414 def chromium_filter_bad_tests(tests, disabled, fails, flaky, pre, manual): | |
415 """Filters out chromium specific bad tests in addition to filter_bad_tests(). | |
416 | |
417 Filters out FAILS_, FLAKY_, PRE_, MANUAL_ and other weird Chromium-specific | |
418 test cases. | |
419 """ | |
420 return [ | |
421 test for test in tests if chromium_is_valid_test_case( | |
422 test, disabled, fails, flaky, pre, manual) | |
423 ] | |
424 | |
425 | |
426 def chromium_filter_pre_tests(test_case_results): | |
427 """Filters out PRE_ test case results.""" | |
428 return ( | |
429 i for i in test_case_results if chromium_is_valid_test_case( | |
430 i['test_case'], | |
431 disabled=True, | |
432 fails=True, | |
433 flaky=True, | |
434 pre=False, | |
435 manual=True)) | |
436 | |
437 | |
438 def parse_gtest_cases(out, seed): | |
439 """Returns the flattened list of test cases in the executable. | |
440 | |
441 The returned list is sorted so it is not dependent on the order of the linked | |
442 objects. Then |seed| is applied to deterministically shuffle the list if | |
443 |seed| is a positive value. The rationale is that the probability of two test | |
444 cases stomping on each other when run simultaneously is high for test cases in | |
445 the same fixture. By shuffling the tests, the probability of these badly | |
446 written tests running simultaneously, let alone being in the same shard, is | |
447 lower. | |
448 | |
449 Expected format is a concatenation of this: | |
450 TestFixture1 | |
451 TestCase1 | |
452 TestCase2 | |
453 """ | |
454 tests = [] | |
455 fixture = None | |
456 lines = out.splitlines() | |
457 while lines: | |
458 line = lines.pop(0) | |
459 if not line: | |
460 break | |
461 if not line.startswith(' '): | |
462 fixture = line | |
463 else: | |
464 case = line[2:] | |
465 if case.startswith('YOU HAVE'): | |
466 # It's a 'YOU HAVE foo bar' line. We're done. | |
467 break | |
468 assert ' ' not in case | |
469 tests.append(fixture + case) | |
470 tests = sorted(tests) | |
471 if seed: | |
472 # Sadly, python's random module doesn't permit local seeds. | |
473 state = random.getstate() | |
474 try: | |
475 # This is totally deterministic. | |
476 random.seed(seed) | |
477 random.shuffle(tests) | |
478 finally: | |
479 random.setstate(state) | |
480 return tests | |
481 | |
482 | |
483 def list_test_cases(cmd, cwd, index, shards, seed, disabled): | |
484 """Returns the list of test cases according to the specified criterias.""" | |
485 tests = parse_gtest_cases(gtest_list_tests(cmd, cwd), seed) | |
486 | |
487 # TODO(maruel): Splitting shards before filtering bad test cases could result | |
488 # in inbalanced shards. | |
489 if shards: | |
490 tests = filter_shards(tests, index, shards) | |
491 return filter_bad_tests(tests, disabled) | |
492 | |
493 | |
494 def chromium_list_test_cases( | |
495 cmd, cwd, index, shards, seed, disabled, fails, flaky, pre, manual): | |
496 """Returns the list of test cases according to the specified criterias.""" | |
497 tests = list_test_cases(cmd, cwd, index, shards, seed, disabled) | |
498 return chromium_filter_bad_tests(tests, disabled, fails, flaky, pre, manual) | |
499 | |
500 | |
501 class RunSome(object): | |
502 """Thread-safe object deciding if testing should continue.""" | |
503 def __init__( | |
504 self, expected_count, retries, min_failures, max_failure_ratio, | |
505 max_failures): | |
506 """Determines if it is better to give up testing after an amount of failures | |
507 and successes. | |
508 | |
509 Arguments: | |
510 - expected_count is the expected number of elements to run. | |
511 - retries is how many time a failing element can be retried. retries should | |
512 be set to the maximum number of retries per failure. This permits | |
513 dampening the curve to determine threshold where to stop. | |
514 - min_failures is the minimal number of failures to tolerate, to put a lower | |
515 limit when expected_count is small. This value is multiplied by the number | |
516 of retries. | |
517 - max_failure_ratio is the ratio of permitted failures, e.g. 0.1 to stop | |
518 after 10% of failed test cases. | |
519 - max_failures is the absolute maximum number of tolerated failures or None. | |
520 | |
521 For large values of expected_count, the number of tolerated failures will be | |
522 at maximum "(expected_count * retries) * max_failure_ratio". | |
523 | |
524 For small values of expected_count, the number of tolerated failures will be | |
525 at least "min_failures * retries". | |
526 """ | |
527 assert 0 < expected_count | |
528 assert 0 <= retries < 100 | |
529 assert 0 <= min_failures | |
530 assert 0. < max_failure_ratio < 1. | |
531 # Constants. | |
532 self._expected_count = expected_count | |
533 self._retries = retries | |
534 self._min_failures = min_failures | |
535 self._max_failure_ratio = max_failure_ratio | |
536 | |
537 self._min_failures_tolerated = self._min_failures * (self._retries + 1) | |
538 # Pre-calculate the maximum number of allowable failures. Note that | |
539 # _max_failures can be lower than _min_failures. | |
540 self._max_failures_tolerated = round( | |
541 (expected_count * (retries + 1)) * max_failure_ratio) | |
542 if max_failures is not None: | |
543 # Override the ratio if necessary. | |
544 self._max_failures_tolerated = min( | |
545 self._max_failures_tolerated, max_failures) | |
546 self._min_failures_tolerated = min( | |
547 self._min_failures_tolerated, max_failures) | |
548 | |
549 # Variables. | |
550 self._lock = threading.Lock() | |
551 self._passed = 0 | |
552 self._failures = 0 | |
553 self.stopped = False | |
554 | |
555 def should_stop(self): | |
556 """Stops once a threshold was reached. This includes retries.""" | |
557 with self._lock: | |
558 if self.stopped: | |
559 return True | |
560 # Accept at least the minimum number of failures. | |
561 if self._failures <= self._min_failures_tolerated: | |
562 return False | |
563 if self._failures >= self._max_failures_tolerated: | |
564 self.stopped = True | |
565 return self.stopped | |
566 | |
567 def got_result(self, passed): | |
568 with self._lock: | |
569 if passed: | |
570 self._passed += 1 | |
571 else: | |
572 self._failures += 1 | |
573 | |
574 def __str__(self): | |
575 return '%s(%d, %d, %d, %.3f)' % ( | |
576 self.__class__.__name__, | |
577 self._expected_count, | |
578 self._retries, | |
579 self._min_failures, | |
580 self._max_failure_ratio) | |
581 | |
582 | |
583 class RunAll(object): | |
584 """Never fails.""" | |
585 stopped = False | |
586 | |
587 @staticmethod | |
588 def should_stop(): | |
589 return False | |
590 | |
591 @staticmethod | |
592 def got_result(_): | |
593 pass | |
594 | |
595 | |
596 def process_output(lines, test_cases): | |
597 """Yield the data of each test cases. | |
598 | |
599 Expects the test cases to be run in the order of the list. | |
600 | |
601 Handles the following google-test behavior: | |
602 - Test case crash causing a partial number of test cases to be run. | |
603 - Invalid test case name so the test case wasn't run at all. | |
604 | |
605 This function automatically distribute the startup cost across each test case. | |
606 """ | |
607 test_cases = test_cases[:] | |
608 test_case = None | |
609 test_case_data = None | |
610 # Accumulates the junk between test cases. | |
611 accumulation = '' | |
612 eat_last_lines = False | |
613 | |
614 for line in lines: | |
615 if eat_last_lines: | |
616 test_case_data['output'] += line | |
617 continue | |
618 | |
619 i = line.find(RUN_PREFIX) | |
620 if i > 0 and test_case_data: | |
621 # This may occur specifically in browser_tests, because the test case is | |
622 # run in a child process. If the child process doesn't terminate its | |
623 # output with a LF, it may cause the "[ RUN ]" line to be improperly | |
624 # printed out in the middle of a line. | |
625 test_case_data['output'] += line[:i] | |
626 line = line[i:] | |
627 i = 0 | |
628 if i >= 0: | |
629 if test_case: | |
630 # The previous test case had crashed. No idea about its duration | |
631 test_case_data['returncode'] = 1 | |
632 test_case_data['duration'] = 0 | |
633 test_case_data['crashed'] = True | |
634 yield test_case_data | |
635 | |
636 test_case = line[len(RUN_PREFIX):].strip().split(' ', 1)[0] | |
637 # Accept the test case even if it was unexpected. | |
638 if test_case in test_cases: | |
639 test_cases.remove(test_case) | |
640 else: | |
641 logging.warning('Unexpected test case: %s', test_case) | |
642 test_case_data = { | |
643 'test_case': test_case, | |
644 'returncode': None, | |
645 'duration': None, | |
646 'output': accumulation + line, | |
647 } | |
648 accumulation = '' | |
649 | |
650 elif test_case: | |
651 test_case_data['output'] += line | |
652 i = line.find(OK_PREFIX) | |
653 if i >= 0: | |
654 result = 0 | |
655 line = line[i + len(OK_PREFIX):] | |
656 else: | |
657 i = line.find(FAILED_PREFIX) | |
658 if i >= 0: | |
659 line = line[i + len(FAILED_PREFIX):] | |
660 result = 1 | |
661 if i >= 0: | |
662 # The test completed. It's important to make sure the test case name | |
663 # match too, since it could be a fake output. | |
664 if line.startswith(test_case): | |
665 line = line[len(test_case):] | |
666 match = re.search(r' \((\d+) ms\)', line) | |
667 if match: | |
668 test_case_data['duration'] = float(match.group(1)) / 1000. | |
669 else: | |
670 # Make sure duration is at least not None since the test case ran. | |
671 test_case_data['duration'] = 0 | |
672 test_case_data['returncode'] = result | |
673 if not test_cases: | |
674 # Its the last test case. Eat all the remaining lines. | |
675 eat_last_lines = True | |
676 continue | |
677 yield test_case_data | |
678 test_case = None | |
679 test_case_data = None | |
680 else: | |
681 accumulation += line | |
682 | |
683 # It's guaranteed here that the lines generator is exhausted. | |
684 if eat_last_lines: | |
685 yield test_case_data | |
686 test_case = None | |
687 test_case_data = None | |
688 | |
689 if test_case_data: | |
690 # This means the last one likely crashed. | |
691 test_case_data['crashed'] = True | |
692 test_case_data['duration'] = 0 | |
693 test_case_data['returncode'] = 1 | |
694 test_case_data['output'] += accumulation | |
695 yield test_case_data | |
696 | |
697 # If test_cases is not empty, these test cases were not run. | |
698 for t in test_cases: | |
699 yield { | |
700 'test_case': t, | |
701 'returncode': None, | |
702 'duration': None, | |
703 'output': None, | |
704 } | |
705 | |
706 | |
707 def convert_to_lines(generator): | |
708 """Turn input coming from a generator into lines. | |
709 | |
710 It is Windows-friendly. | |
711 """ | |
712 accumulator = '' | |
713 for data in generator: | |
714 items = (accumulator + data).splitlines(True) | |
715 for item in items[:-1]: | |
716 yield item | |
717 if items[-1].endswith(('\r', '\n')): | |
718 yield items[-1] | |
719 accumulator = '' | |
720 else: | |
721 accumulator = items[-1] | |
722 if accumulator: | |
723 yield accumulator | |
724 | |
725 | |
726 class ResetableTimeout(object): | |
727 """A resetable timeout that acts as a float. | |
728 | |
729 At each reset, the timeout is increased so that it still has the equivalent | |
730 of the original timeout value, but according to 'now' at the time of the | |
731 reset. | |
732 """ | |
733 def __init__(self, timeout): | |
734 assert timeout >= 0. | |
735 self.timeout = float(timeout) | |
736 self.last_reset = time.time() | |
737 | |
738 def reset(self): | |
739 """Respendish the timeout.""" | |
740 now = time.time() | |
741 self.timeout += max(0., now - self.last_reset) | |
742 self.last_reset = now | |
743 return now | |
744 | |
745 @staticmethod | |
746 def __bool__(): | |
747 return True | |
748 | |
749 def __float__(self): | |
750 """To be used as a timeout value for a function call.""" | |
751 return self.timeout | |
752 | |
753 | |
754 class GoogleTestRunner(object): | |
755 """Immutable settings to run many test cases in a loop.""" | |
756 def __init__( | |
757 self, | |
758 cmd, | |
759 cwd_dir, | |
760 timeout, | |
761 progress, | |
762 retries, | |
763 decider, | |
764 verbose, | |
765 add_task, | |
766 add_serial_task, | |
767 filter_results): | |
768 """Defines how to run a googletest executable. | |
769 | |
770 Arguments: | |
771 - cmd: command line to start with. | |
772 - cwd_dir: directory to start the app in. | |
773 - timeout: timeout while waiting for output. | |
774 - progress: object to present the user with status updates. | |
775 - retries: number of allowed retries. For example if 2, the test case will | |
776 be tried 3 times in total. | |
777 - decider: object to decide if the run should be stopped early. | |
778 - verbose: inconditionally prints output. | |
779 - add_task: function to add the task back when failing, for retry. | |
780 - add_serial_task: function to add the task back when failing too often so | |
781 it should be run serially. | |
782 - filter_results: optional function to filter undesired extraneous test case | |
783 run without our consent. | |
784 """ | |
785 self.cmd = cmd[:] | |
786 self.cwd_dir = cwd_dir | |
787 self.timeout = timeout | |
788 self.progress = progress | |
789 self.retries = retries | |
790 self.decider = decider | |
791 self.verbose = verbose | |
792 self.add_task = add_task | |
793 self.add_serial_task = add_serial_task | |
794 self.filter_results = filter_results or (lambda x: x) | |
795 # It is important to remove the shard environment variables since it could | |
796 # conflict with --gtest_filter. | |
797 self.env = setup_gtest_env() | |
798 | |
799 def map(self, priority, test_cases, try_count): | |
800 """Traces a single test case and returns its output. | |
801 | |
802 try_count is 0 based, the original try is 0. | |
803 """ | |
804 if self.decider.should_stop(): | |
805 raise StopIteration() | |
806 cmd = self.cmd + ['--gtest_filter=%s' % ':'.join(test_cases)] | |
807 if '--gtest_print_time' not in cmd: | |
808 cmd.append('--gtest_print_time') | |
809 proc = Popen( | |
810 cmd, | |
811 cwd=self.cwd_dir, | |
812 stdout=subprocess.PIPE, | |
813 stderr=subprocess.STDOUT, | |
814 env=self.env) | |
815 | |
816 # Use an intelligent timeout that can be reset. The idea is simple, the | |
817 # timeout is set to the value of the timeout for a single test case. | |
818 # Everytime a test case is parsed, the timeout is reset to its full value. | |
819 # proc.yield_any() uses float() to extract the instantaneous value of | |
820 # 'timeout'. | |
821 timeout = ResetableTimeout(self.timeout) | |
822 | |
823 # Create a pipeline of generators. | |
824 gen_lines = convert_to_lines(data for _, data in proc.yield_any(timeout)) | |
825 # It needs to be valid utf-8 otherwise it can't be stored. | |
826 # TODO(maruel): Be more intelligent than decoding to ascii. | |
827 gen_lines_utf8 = ( | |
828 line.decode('ascii', 'ignore').encode('utf-8') for line in gen_lines) | |
829 gen_test_cases = process_output(gen_lines_utf8, test_cases) | |
830 last_timestamp = proc.start | |
831 got_failure_at_least_once = False | |
832 results = [] | |
833 for i in self.filter_results(gen_test_cases): | |
834 results.append(i) | |
835 now = timeout.reset() | |
836 test_case_has_passed = (i['returncode'] == 0) | |
837 if i['duration'] is None: | |
838 assert not test_case_has_passed | |
839 # Do not notify self.decider, because an early crash in a large cluster | |
840 # could cause the test to quit early. | |
841 else: | |
842 i['duration'] = max(i['duration'], now - last_timestamp) | |
843 # A new test_case completed. | |
844 self.decider.got_result(test_case_has_passed) | |
845 | |
846 need_to_retry = not test_case_has_passed and try_count < self.retries | |
847 got_failure_at_least_once |= not test_case_has_passed | |
848 last_timestamp = now | |
849 | |
850 # Create the line to print out. | |
851 if i['duration'] is not None: | |
852 duration = '(%.2fs)' % i['duration'] | |
853 else: | |
854 duration = '<unknown>' | |
855 if try_count: | |
856 line = '%s %s - retry #%d' % (i['test_case'], duration, try_count) | |
857 else: | |
858 line = '%s %s' % (i['test_case'], duration) | |
859 if self.verbose or not test_case_has_passed or try_count > 0: | |
860 # Print output in one of three cases: | |
861 # - --verbose was specified. | |
862 # - The test failed. | |
863 # - The wasn't the first attempt (this is needed so the test parser can | |
864 # detect that a test has been successfully retried). | |
865 if i['output']: | |
866 line += '\n' + i['output'] | |
867 self.progress.update_item(line, index=1, size=int(need_to_retry)) | |
868 | |
869 if need_to_retry: | |
870 priority = self._retry(priority, i['test_case'], try_count) | |
871 | |
872 # Delay yielding when only one test case is running, in case of a | |
873 # crash-after-succeed. | |
874 if len(test_cases) > 1: | |
875 yield i | |
876 | |
877 if proc.returncode and not got_failure_at_least_once: | |
878 if results and len(test_cases) == 1: | |
879 # Crash after pass. | |
880 results[-1]['returncode'] = proc.returncode | |
881 | |
882 if try_count < self.retries: | |
883 # This is tricky, one of the test case failed but each did print that | |
884 # they succeeded! Retry them *all* individually. | |
885 if not self.verbose and not try_count: | |
886 # Print all the output as one shot when not verbose to be sure the | |
887 # potential stack trace is printed. | |
888 output = ''.join(i['output'] for i in results) | |
889 self.progress.update_item(output, raw=True) | |
890 for i in results: | |
891 priority = self._retry(priority, i['test_case'], try_count) | |
892 self.progress.update_item('', size=1) | |
893 | |
894 # Only yield once the process completed when there is only one test case as | |
895 # a safety precaution. | |
896 if results and len(test_cases) == 1: | |
897 yield results[-1] | |
898 | |
899 def _retry(self, priority, test_case, try_count): | |
900 """Adds back the same task again only if relevant. | |
901 | |
902 It may add it either at lower (e.g. higher value) priority or at the end of | |
903 the serially executed list. | |
904 """ | |
905 if try_count + 1 < self.retries: | |
906 # The test failed and needs to be retried normally. | |
907 # Leave a buffer of ~40 test cases before retrying. | |
908 priority += 40 | |
909 self.add_task(priority, self.map, priority, [test_case], try_count + 1) | |
910 else: | |
911 # This test only has one retry left, so the final retry should be | |
912 # done serially. | |
913 self.add_serial_task( | |
914 priority, self.map, priority, [test_case], try_count + 1) | |
915 return priority | |
916 | |
917 | |
918 class ChromiumGoogleTestRunner(GoogleTestRunner): | |
919 def __init__(self, *args, **kwargs): | |
920 super(ChromiumGoogleTestRunner, self).__init__( | |
921 *args, filter_results=chromium_filter_pre_tests, **kwargs) | |
922 | |
923 | |
924 def get_test_cases( | |
925 cmd, cwd, whitelist, blacklist, index, shards, seed, disabled, fails, flaky, | |
926 manual): | |
927 """Returns the filtered list of test cases. | |
928 | |
929 This is done synchronously. | |
930 """ | |
931 try: | |
932 # List all the test cases if a whitelist is used. | |
933 tests = chromium_list_test_cases( | |
934 cmd, | |
935 cwd, | |
936 index=index, | |
937 shards=shards, | |
938 seed=seed, | |
939 disabled=disabled, | |
940 fails=fails, | |
941 flaky=flaky, | |
942 pre=False, | |
943 manual=manual) | |
944 except Failure, e: | |
945 print('Failed to list test cases. This means the test executable is so ' | |
946 'broken that it failed to start and enumerate its test cases.\n\n' | |
947 'An example of a potential problem causing this is a Windows API ' | |
948 'function not available on this version of Windows.') | |
949 print(e.args[0]) | |
950 return None | |
951 | |
952 if shards: | |
953 # This is necessary for Swarm log parsing. | |
954 print('Note: This is test shard %d of %d.' % (index+1, shards)) | |
955 | |
956 # Filters the test cases with the two lists. | |
957 if blacklist: | |
958 tests = [ | |
959 t for t in tests if not any(fnmatch.fnmatch(t, s) for s in blacklist) | |
960 ] | |
961 if whitelist: | |
962 tests = [ | |
963 t for t in tests if any(fnmatch.fnmatch(t, s) for s in whitelist) | |
964 ] | |
965 logging.info('Found %d test cases in %s' % (len(tests), ' '.join(cmd))) | |
966 return tests | |
967 | |
968 | |
969 def dump_results_as_json(result_file, results): | |
970 """Write the results out to a json file.""" | |
971 base_path = os.path.dirname(result_file) | |
972 if base_path and not os.path.isdir(base_path): | |
973 os.makedirs(base_path) | |
974 with open(result_file, 'wb') as f: | |
975 json.dump(results, f, sort_keys=True, indent=2) | |
976 | |
977 | |
978 def dump_results_as_xml(gtest_output, results, now): | |
979 """Write the results out to a xml file in google-test compatible format.""" | |
980 # TODO(maruel): Print all the test cases, including the ones that weren't run | |
981 # and the retries. | |
982 test_suites = {} | |
983 for test_case, result in results['test_cases'].iteritems(): | |
984 suite, case = test_case.split('.', 1) | |
985 test_suites.setdefault(suite, {})[case] = result[0] | |
986 | |
987 with open(gtest_output, 'wb') as f: | |
988 # Sanity warning: hand-rolling XML. What could possibly go wrong? | |
989 f.write('<?xml version="1.0" ?>\n') | |
990 # TODO(maruel): File the fields nobody reads anyway. | |
991 # disabled="%d" errors="%d" failures="%d" | |
992 f.write( | |
993 ('<testsuites name="AllTests" tests="%d" time="%f" timestamp="%s">\n') | |
994 % (results['expected'], results['duration'], now)) | |
995 for suite_name, suite in test_suites.iteritems(): | |
996 # TODO(maruel): disabled="0" errors="0" failures="0" time="0" | |
997 f.write('<testsuite name="%s" tests="%d">\n' % (suite_name, len(suite))) | |
998 for case_name, case in suite.iteritems(): | |
999 if case['returncode'] == 0: | |
1000 f.write( | |
1001 ' <testcase classname="%s" name="%s" status="run" time="%f"/>\n' % | |
1002 (suite_name, case_name, case['duration'])) | |
1003 else: | |
1004 f.write( | |
1005 ' <testcase classname="%s" name="%s" status="run" time="%f">\n' % | |
1006 (suite_name, case_name, (case['duration'] or 0))) | |
1007 # While at it, hand-roll CDATA escaping too. | |
1008 output = ']]><![CDATA['.join((case['output'] or '').split(']]>')) | |
1009 # TODO(maruel): message="" type="" | |
1010 f.write('<failure><![CDATA[%s]]></failure></testcase>\n' % output) | |
1011 f.write('</testsuite>\n') | |
1012 f.write('</testsuites>') | |
1013 | |
1014 | |
1015 def append_gtest_output_to_xml(final_xml, filepath): | |
1016 """Combines the shard xml file with the final xml file.""" | |
1017 try: | |
1018 with open(filepath) as shard_xml_file: | |
1019 shard_xml = minidom.parse(shard_xml_file) | |
1020 except xml.parsers.expat.ExpatError as e: | |
1021 logging.error('Failed to parse %s: %s', filepath, e) | |
1022 return final_xml | |
1023 except IOError as e: | |
1024 logging.error('Failed to load %s: %s', filepath, e) | |
1025 # If the shard crashed, gtest will not have generated an xml file. | |
1026 return final_xml | |
1027 | |
1028 if not final_xml: | |
1029 # Out final xml is empty, let's prepopulate it with the first one we see. | |
1030 return shard_xml | |
1031 | |
1032 final_testsuites_by_name = dict( | |
1033 (suite.getAttribute('name'), suite) | |
1034 for suite in final_xml.documentElement.getElementsByTagName('testsuite')) | |
1035 | |
1036 for testcase in shard_xml.documentElement.getElementsByTagName('testcase'): | |
1037 # Don't bother updating the final xml if there is no data. | |
1038 status = testcase.getAttribute('status') | |
1039 if status == 'notrun': | |
1040 continue | |
1041 | |
1042 name = testcase.getAttribute('name') | |
1043 # Look in our final xml to see if it's there. | |
1044 to_remove = [] | |
1045 final_testsuite = final_testsuites_by_name[ | |
1046 testcase.getAttribute('classname')] | |
1047 for final_testcase in final_testsuite.getElementsByTagName('testcase'): | |
1048 # Trim all the notrun testcase instances to add the new instance there. | |
1049 # This is to make sure it works properly in case of a testcase being run | |
1050 # multiple times. | |
1051 if (final_testcase.getAttribute('name') == name and | |
1052 final_testcase.getAttribute('status') == 'notrun'): | |
1053 to_remove.append(final_testcase) | |
1054 | |
1055 for item in to_remove: | |
1056 final_testsuite.removeChild(item) | |
1057 # Reparent the XML node. | |
1058 final_testsuite.appendChild(testcase) | |
1059 | |
1060 return final_xml | |
1061 | |
1062 | |
1063 def running_serial_warning(): | |
1064 return ['*****************************************************', | |
1065 '*****************************************************', | |
1066 '*****************************************************', | |
1067 'WARNING: The remaining tests are going to be retried', | |
1068 'serially. All tests should be isolated and be able to pass', | |
1069 'regardless of what else is running.', | |
1070 'If you see a test that can only pass serially, that test is', | |
1071 'probably broken and should be fixed.', | |
1072 '*****************************************************', | |
1073 '*****************************************************', | |
1074 '*****************************************************'] | |
1075 | |
1076 | |
1077 def gen_gtest_output_dir(cwd, gtest_output): | |
1078 """Converts gtest_output to an actual path that can be used in parallel. | |
1079 | |
1080 Returns a 'corrected' gtest_output value. | |
1081 """ | |
1082 if not gtest_output.startswith('xml'): | |
1083 raise Failure('Can\'t parse --gtest_output=%s' % gtest_output) | |
1084 # Figure out the result filepath in case we can't parse it, it'd be | |
1085 # annoying to error out *after* running the tests. | |
1086 if gtest_output == 'xml': | |
1087 gtest_output = os.path.join(cwd, 'test_detail.xml') | |
1088 else: | |
1089 match = re.match(r'xml\:(.+)', gtest_output) | |
1090 if not match: | |
1091 raise Failure('Can\'t parse --gtest_output=%s' % gtest_output) | |
1092 # If match.group(1) is an absolute path, os.path.join() will do the right | |
1093 # thing. | |
1094 if match.group(1).endswith((os.path.sep, '/')): | |
1095 gtest_output = os.path.join(cwd, match.group(1), 'test_detail.xml') | |
1096 else: | |
1097 gtest_output = os.path.join(cwd, match.group(1)) | |
1098 | |
1099 base_path = os.path.dirname(gtest_output) | |
1100 if base_path and not os.path.isdir(base_path): | |
1101 os.makedirs(base_path) | |
1102 | |
1103 # Emulate google-test' automatic increasing index number. | |
1104 while True: | |
1105 try: | |
1106 # Creates a file exclusively. | |
1107 os.close(os.open(gtest_output, os.O_CREAT|os.O_EXCL|os.O_RDWR, 0666)) | |
1108 # It worked, we are done. | |
1109 return gtest_output | |
1110 except OSError: | |
1111 pass | |
1112 logging.debug('%s existed', gtest_output) | |
1113 base, ext = os.path.splitext(gtest_output) | |
1114 match = re.match(r'^(.+?_)(\d+)$', base) | |
1115 if match: | |
1116 base = match.group(1) + str(int(match.group(2)) + 1) | |
1117 else: | |
1118 base = base + '_0' | |
1119 gtest_output = base + ext | |
1120 | |
1121 | |
1122 def calc_cluster_default(num_test_cases, jobs): | |
1123 """Calculates a desired number for clusters depending on the number of test | |
1124 cases and parallel jobs. | |
1125 """ | |
1126 if not num_test_cases: | |
1127 return 0 | |
1128 chunks = 6 * jobs | |
1129 if chunks >= num_test_cases: | |
1130 # Too many chunks, use 1~5 test case per thread. Not enough to start | |
1131 # chunking. | |
1132 value = num_test_cases / jobs | |
1133 else: | |
1134 # Use chunks that are spread across threads. | |
1135 value = (num_test_cases + chunks - 1) / chunks | |
1136 # Limit to 10 test cases per cluster. | |
1137 return min(10, max(1, value)) | |
1138 | |
1139 | |
1140 def run_test_cases( | |
1141 cmd, cwd, test_cases, jobs, timeout, clusters, retries, run_all, | |
1142 max_failures, no_cr, gtest_output, result_file, verbose): | |
1143 """Runs test cases in parallel. | |
1144 | |
1145 Arguments: | |
1146 - cmd: command to run. | |
1147 - cwd: working directory. | |
1148 - test_cases: list of preprocessed test cases to run. | |
1149 - jobs: number of parallel execution threads to do. | |
1150 - timeout: individual test case timeout. Modulated when used with | |
1151 clustering. | |
1152 - clusters: number of test cases to lump together in a single execution. 0 | |
1153 means the default automatic value which depends on len(test_cases) and | |
1154 jobs. Capped to len(test_cases) / jobs. | |
1155 - retries: number of times a test case can be retried. | |
1156 - run_all: If true, do not early return even if all test cases fail. | |
1157 - max_failures is the absolute maximum number of tolerated failures or None. | |
1158 - no_cr: makes output friendly to piped logs. | |
1159 - gtest_output: saves results as xml. | |
1160 - result_file: saves results as json. | |
1161 - verbose: print more details. | |
1162 | |
1163 It may run a subset of the test cases if too many test cases failed, as | |
1164 determined with max_failures, retries and run_all. | |
1165 """ | |
1166 assert 0 <= retries <= 100000 | |
1167 if not test_cases: | |
1168 return 0 | |
1169 if run_all: | |
1170 decider = RunAll() | |
1171 else: | |
1172 # If 10% of test cases fail, just too bad. | |
1173 decider = RunSome(len(test_cases), retries, 2, 0.1, max_failures) | |
1174 | |
1175 if not clusters: | |
1176 clusters = calc_cluster_default(len(test_cases), jobs) | |
1177 else: | |
1178 # Limit the value. | |
1179 clusters = max(min(clusters, len(test_cases) / jobs), 1) | |
1180 | |
1181 logging.debug('%d test cases with clusters of %d', len(test_cases), clusters) | |
1182 | |
1183 if gtest_output: | |
1184 gtest_output = gen_gtest_output_dir(cwd, gtest_output) | |
1185 columns = [('index', 0), ('size', len(test_cases))] | |
1186 progress = threading_utils.Progress(columns) | |
1187 progress.use_cr_only = not no_cr | |
1188 serial_tasks = threading_utils.QueueWithProgress(progress) | |
1189 | |
1190 def add_serial_task(priority, func, *args, **kwargs): | |
1191 """Adds a serial task, to be executed later.""" | |
1192 assert isinstance(priority, int) | |
1193 assert callable(func) | |
1194 serial_tasks.put((priority, func, args, kwargs)) | |
1195 | |
1196 with threading_utils.ThreadPoolWithProgress( | |
1197 progress, jobs, jobs, len(test_cases)) as pool: | |
1198 runner = ChromiumGoogleTestRunner( | |
1199 cmd, | |
1200 cwd, | |
1201 timeout, | |
1202 progress, | |
1203 retries, | |
1204 decider, | |
1205 verbose, | |
1206 pool.add_task, | |
1207 add_serial_task) | |
1208 function = runner.map | |
1209 # Cluster the test cases right away. | |
1210 for i in xrange((len(test_cases) + clusters - 1) / clusters): | |
1211 cluster = test_cases[i*clusters : (i+1)*clusters] | |
1212 pool.add_task(i, function, i, cluster, 0) | |
1213 results = pool.join() | |
1214 | |
1215 # Retry any failed tests serially. | |
1216 if not serial_tasks.empty(): | |
1217 progress.update_item('\n'.join(running_serial_warning()), raw=True) | |
1218 progress.print_update() | |
1219 | |
1220 while not serial_tasks.empty(): | |
1221 _priority, func, args, kwargs = serial_tasks.get() | |
1222 for out in func(*args, **kwargs): | |
1223 results.append(out) | |
1224 serial_tasks.task_done() | |
1225 progress.print_update() | |
1226 | |
1227 # Call join since that is a standard call once a queue has been emptied. | |
1228 serial_tasks.join() | |
1229 | |
1230 duration = time.time() - pool.tasks.progress.start | |
1231 | |
1232 cleaned = {} | |
1233 for i in results: | |
1234 cleaned.setdefault(i['test_case'], []).append(i) | |
1235 results = cleaned | |
1236 | |
1237 # Total time taken to run each test case. | |
1238 test_case_duration = dict( | |
1239 (test_case, sum((i.get('duration') or 0) for i in item)) | |
1240 for test_case, item in results.iteritems()) | |
1241 | |
1242 # Classify the results | |
1243 success = [] | |
1244 flaky = [] | |
1245 fail = [] | |
1246 nb_runs = 0 | |
1247 for test_case in sorted(results): | |
1248 items = results[test_case] | |
1249 nb_runs += len(items) | |
1250 if not any(i['returncode'] == 0 for i in items): | |
1251 fail.append(test_case) | |
1252 elif len(items) > 1 and any(i['returncode'] == 0 for i in items): | |
1253 flaky.append(test_case) | |
1254 elif len(items) == 1 and items[0]['returncode'] == 0: | |
1255 success.append(test_case) | |
1256 else: | |
1257 # The test never ran. | |
1258 assert False, items | |
1259 missing = sorted(set(test_cases) - set(success) - set(flaky) - set(fail)) | |
1260 | |
1261 saved = { | |
1262 'test_cases': results, | |
1263 'expected': len(test_cases), | |
1264 'success': success, | |
1265 'flaky': flaky, | |
1266 'fail': fail, | |
1267 'missing': missing, | |
1268 'duration': duration, | |
1269 } | |
1270 if result_file: | |
1271 dump_results_as_json(result_file, saved) | |
1272 if gtest_output: | |
1273 dump_results_as_xml(gtest_output, saved, datetime.datetime.now()) | |
1274 sys.stdout.write('\n') | |
1275 if not results: | |
1276 return 1 | |
1277 | |
1278 if flaky: | |
1279 print('Flaky tests:') | |
1280 for test_case in sorted(flaky): | |
1281 items = results[test_case] | |
1282 print(' %s (tried %d times)' % (test_case, len(items))) | |
1283 | |
1284 if fail: | |
1285 print('Failed tests:') | |
1286 for test_case in sorted(fail): | |
1287 print(' %s' % test_case) | |
1288 | |
1289 if not decider.should_stop() and missing: | |
1290 print('Missing tests:') | |
1291 for test_case in sorted(missing): | |
1292 print(' %s' % test_case) | |
1293 | |
1294 print('Summary:') | |
1295 if decider.should_stop(): | |
1296 print(' ** STOPPED EARLY due to high failure rate **') | |
1297 output = [ | |
1298 ('Success', success), | |
1299 ('Flaky', flaky), | |
1300 ('Fail', fail), | |
1301 ] | |
1302 if missing: | |
1303 output.append(('Missing', missing)) | |
1304 total_expected = len(test_cases) | |
1305 for name, items in output: | |
1306 number = len(items) | |
1307 print( | |
1308 ' %7s: %4d %6.2f%% %7.2fs' % ( | |
1309 name, | |
1310 number, | |
1311 number * 100. / total_expected, | |
1312 sum(test_case_duration.get(item, 0) for item in items))) | |
1313 print(' %.2fs Done running %d tests with %d executions. %.2f test/s' % ( | |
1314 duration, | |
1315 len(results), | |
1316 nb_runs, | |
1317 nb_runs / duration if duration else 0)) | |
1318 return int(bool(fail) or decider.stopped or bool(missing)) | |
1319 | |
1320 | |
1321 class OptionParserWithLogging(tools.OptionParserWithLogging): | |
1322 def __init__(self, **kwargs): | |
1323 tools.OptionParserWithLogging.__init__( | |
1324 self, | |
1325 log_file=os.environ.get('RUN_TEST_CASES_LOG_FILE', ''), | |
1326 **kwargs) | |
1327 | |
1328 | |
1329 class OptionParserWithTestSharding(OptionParserWithLogging): | |
1330 """Adds automatic handling of test sharding""" | |
1331 def __init__(self, **kwargs): | |
1332 OptionParserWithLogging.__init__(self, **kwargs) | |
1333 | |
1334 def as_digit(variable, default): | |
1335 return int(variable) if variable.isdigit() else default | |
1336 | |
1337 group = optparse.OptionGroup(self, 'Which shard to select') | |
1338 group.add_option( | |
1339 '-I', '--index', | |
1340 type='int', | |
1341 default=as_digit(os.environ.get('GTEST_SHARD_INDEX', ''), None), | |
1342 help='Shard index to select') | |
1343 group.add_option( | |
1344 '-S', '--shards', | |
1345 type='int', | |
1346 default=as_digit(os.environ.get('GTEST_TOTAL_SHARDS', ''), None), | |
1347 help='Total number of shards to calculate from the --index to select') | |
1348 self.add_option_group(group) | |
1349 | |
1350 def parse_args(self, *args, **kwargs): | |
1351 options, args = OptionParserWithLogging.parse_args(self, *args, **kwargs) | |
1352 if bool(options.shards) != bool(options.index is not None): | |
1353 self.error('Use both --index X --shards Y or none of them') | |
1354 return options, args | |
1355 | |
1356 | |
1357 class OptionParserWithTestShardingAndFiltering(OptionParserWithTestSharding): | |
1358 """Adds automatic handling of test sharding and filtering.""" | |
1359 def __init__(self, *args, **kwargs): | |
1360 OptionParserWithTestSharding.__init__(self, *args, **kwargs) | |
1361 | |
1362 group = optparse.OptionGroup(self, 'Which test cases to select') | |
1363 group.add_option( | |
1364 '-w', '--whitelist', | |
1365 default=[], | |
1366 action='append', | |
1367 help='filter to apply to test cases to run, wildcard-style, defaults ' | |
1368 'to all test') | |
1369 group.add_option( | |
1370 '-b', '--blacklist', | |
1371 default=[], | |
1372 action='append', | |
1373 help='filter to apply to test cases to skip, wildcard-style, defaults ' | |
1374 'to no test') | |
1375 group.add_option( | |
1376 '-T', '--test-case-file', | |
1377 help='File containing the exact list of test cases to run') | |
1378 group.add_option( | |
1379 '--gtest_filter', | |
1380 default=os.environ.get('GTEST_FILTER', ''), | |
1381 help='Select test cases like google-test does, separated with ":"') | |
1382 group.add_option( | |
1383 '--seed', | |
1384 type='int', | |
1385 default=os.environ.get('GTEST_RANDOM_SEED', '1'), | |
1386 help='Deterministically shuffle the test list if non-0. default: ' | |
1387 '%default') | |
1388 group.add_option( | |
1389 '-d', '--disabled', | |
1390 action='store_true', | |
1391 default=int(os.environ.get('GTEST_ALSO_RUN_DISABLED_TESTS', '0')), | |
1392 help='Include DISABLED_ tests') | |
1393 group.add_option( | |
1394 '--gtest_also_run_disabled_tests', | |
1395 action='store_true', | |
1396 dest='disabled', | |
1397 help='same as --disabled') | |
1398 self.add_option_group(group) | |
1399 | |
1400 group = optparse.OptionGroup( | |
1401 self, 'Which test cases to select; chromium-specific') | |
1402 group.add_option( | |
1403 '-f', '--fails', | |
1404 action='store_true', | |
1405 help='Include FAILS_ tests') | |
1406 group.add_option( | |
1407 '-F', '--flaky', | |
1408 action='store_true', | |
1409 help='Include FLAKY_ tests') | |
1410 group.add_option( | |
1411 '-m', '--manual', | |
1412 action='store_true', | |
1413 help='Include MANUAL_ tests') | |
1414 group.add_option( | |
1415 '--run-manual', | |
1416 action='store_true', | |
1417 dest='manual', | |
1418 help='same as --manual') | |
1419 self.add_option_group(group) | |
1420 | |
1421 def parse_args(self, *args, **kwargs): | |
1422 options, args = OptionParserWithTestSharding.parse_args( | |
1423 self, *args, **kwargs) | |
1424 | |
1425 if options.gtest_filter: | |
1426 # Override any other option. | |
1427 # Based on UnitTestOptions::FilterMatchesTest() in | |
1428 # http://code.google.com/p/googletest/source/browse/#svn%2Ftrunk%2Fsrc | |
1429 if '-' in options.gtest_filter: | |
1430 options.whitelist, options.blacklist = options.gtest_filter.split('-', | |
1431 1) | |
1432 else: | |
1433 options.whitelist = options.gtest_filter | |
1434 options.blacklist = '' | |
1435 options.whitelist = [i for i in options.whitelist.split(':') if i] | |
1436 options.blacklist = [i for i in options.blacklist.split(':') if i] | |
1437 | |
1438 return options, args | |
1439 | |
1440 @staticmethod | |
1441 def process_gtest_options(cmd, cwd, options): | |
1442 """Grabs the test cases.""" | |
1443 if options.test_case_file: | |
1444 with open(options.test_case_file, 'r') as f: | |
1445 # Do not shuffle or alter the file in any way in that case except to | |
1446 # strip whitespaces. | |
1447 return [l for l in (l.strip() for l in f) if l] | |
1448 else: | |
1449 return get_test_cases( | |
1450 cmd, | |
1451 cwd, | |
1452 options.whitelist, | |
1453 options.blacklist, | |
1454 options.index, | |
1455 options.shards, | |
1456 options.seed, | |
1457 options.disabled, | |
1458 options.fails, | |
1459 options.flaky, | |
1460 options.manual) | |
1461 | |
1462 | |
1463 class OptionParserTestCases(OptionParserWithTestShardingAndFiltering): | |
1464 def __init__(self, *args, **kwargs): | |
1465 OptionParserWithTestShardingAndFiltering.__init__(self, *args, **kwargs) | |
1466 self.add_option( | |
1467 '-j', '--jobs', | |
1468 type='int', | |
1469 default=threading_utils.num_processors(), | |
1470 help='Number of parallel jobs; default=%default') | |
1471 self.add_option( | |
1472 '--use-less-jobs', | |
1473 action='store_const', | |
1474 const=max(1, threading_utils.num_processors() / 2), | |
1475 dest='jobs', | |
1476 help='Starts less parallel jobs than the default, used to help reduce' | |
1477 'contention between threads if all the tests are very CPU heavy.') | |
1478 self.add_option( | |
1479 '-t', '--timeout', | |
1480 type='int', | |
1481 default=75, | |
1482 help='Timeout for a single test case, in seconds default:%default') | |
1483 self.add_option( | |
1484 '--clusters', | |
1485 type='int', | |
1486 help='Number of test cases to cluster together, clamped to ' | |
1487 'len(test_cases) / jobs; the default is automatic') | |
1488 | |
1489 | |
1490 def process_args(argv): | |
1491 parser = OptionParserTestCases( | |
1492 usage='%prog <options> [gtest]', | |
1493 verbose=int(os.environ.get('ISOLATE_DEBUG', 0))) | |
1494 parser.add_option( | |
1495 '--run-all', | |
1496 action='store_true', | |
1497 help='Do not fail early when a large number of test cases fail') | |
1498 parser.add_option( | |
1499 '--max-failures', type='int', | |
1500 help='Limit the number of failures before aborting') | |
1501 parser.add_option( | |
1502 '--retries', type='int', default=2, | |
1503 help='Number of times each test case should be retried in case of ' | |
1504 'failure.') | |
1505 parser.add_option( | |
1506 '--no-dump', | |
1507 action='store_true', | |
1508 help='do not generate a .run_test_cases file') | |
1509 parser.add_option( | |
1510 '--no-cr', | |
1511 action='store_true', | |
1512 help='Use LF instead of CR for status progress') | |
1513 parser.add_option( | |
1514 '--result', | |
1515 help='Override the default name of the generated .run_test_cases file') | |
1516 | |
1517 group = optparse.OptionGroup(parser, 'google-test compability flags') | |
1518 group.add_option( | |
1519 '--gtest_list_tests', | |
1520 action='store_true', | |
1521 help='List all the test cases unformatted. Keeps compatibility with the ' | |
1522 'executable itself.') | |
1523 group.add_option( | |
1524 '--gtest_output', | |
1525 default=os.environ.get('GTEST_OUTPUT', ''), | |
1526 help='XML output to generate') | |
1527 parser.add_option_group(group) | |
1528 | |
1529 options, args = parser.parse_args(argv) | |
1530 | |
1531 if not args: | |
1532 parser.error( | |
1533 'Please provide the executable line to run, if you need fancy things ' | |
1534 'like xvfb, start this script from *inside* xvfb, it\'ll be much faster' | |
1535 '.') | |
1536 | |
1537 if options.run_all and options.max_failures is not None: | |
1538 parser.error('Use only one of --run-all or --max-failures') | |
1539 return parser, options, tools.fix_python_path(args) | |
1540 | |
1541 | |
1542 def main(argv): | |
1543 """CLI frontend to validate arguments.""" | |
1544 tools.disable_buffering() | |
1545 parser, options, cmd = process_args(argv) | |
1546 | |
1547 if options.gtest_list_tests: | |
1548 # Special case, return the output of the target unmodified. | |
1549 return subprocess.call(cmd + ['--gtest_list_tests']) | |
1550 | |
1551 cwd = os.getcwd() | |
1552 test_cases = parser.process_gtest_options(cmd, cwd, options) | |
1553 | |
1554 if options.no_dump: | |
1555 result_file = None | |
1556 else: | |
1557 result_file = options.result | |
1558 if not result_file: | |
1559 if cmd[0] == sys.executable: | |
1560 result_file = '%s.run_test_cases' % cmd[1] | |
1561 else: | |
1562 result_file = '%s.run_test_cases' % cmd[0] | |
1563 | |
1564 if not test_cases: | |
1565 # The fact of not running any test is considered a failure. This is to | |
1566 # prevent silent failure with an invalid --gtest_filter argument or because | |
1567 # of a misconfigured unit test. | |
1568 if test_cases is not None: | |
1569 print('Found no test to run') | |
1570 if result_file: | |
1571 dump_results_as_json(result_file, { | |
1572 'test_cases': [], | |
1573 'expected': 0, | |
1574 'success': [], | |
1575 'flaky': [], | |
1576 'fail': [], | |
1577 'missing': [], | |
1578 'duration': 0, | |
1579 }) | |
1580 return 1 | |
1581 | |
1582 if options.disabled: | |
1583 cmd.append('--gtest_also_run_disabled_tests') | |
1584 if options.manual: | |
1585 cmd.append('--run-manual') | |
1586 | |
1587 try: | |
1588 return run_test_cases( | |
1589 cmd, | |
1590 cwd, | |
1591 test_cases, | |
1592 options.jobs, | |
1593 options.timeout, | |
1594 options.clusters, | |
1595 options.retries, | |
1596 options.run_all, | |
1597 options.max_failures, | |
1598 options.no_cr, | |
1599 options.gtest_output, | |
1600 result_file, | |
1601 options.verbose) | |
1602 except Failure as e: | |
1603 print >> sys.stderr, e.args[0] | |
1604 return 1 | |
1605 | |
1606 | |
1607 if __name__ == '__main__': | |
1608 sys.exit(main(sys.argv[1:])) | |
OLD | NEW |