| OLD | NEW |
| (Empty) |
| 1 #!/usr/bin/env python | |
| 2 # Copyright 2013 The Chromium Authors. All rights reserved. | |
| 3 # Use of this source code is governed by a BSD-style license that can be | |
| 4 # found in the LICENSE file. | |
| 5 | |
| 6 """Runs each test cases as a single shard, single process execution. | |
| 7 | |
| 8 Similar to sharding_supervisor.py but finer grained. It runs each test case | |
| 9 individually instead of running per shard. Runs multiple instances in parallel. | |
| 10 """ | |
| 11 | |
| 12 import datetime | |
| 13 import fnmatch | |
| 14 import json | |
| 15 import logging | |
| 16 import optparse | |
| 17 import os | |
| 18 import random | |
| 19 import re | |
| 20 import subprocess | |
| 21 import sys | |
| 22 import threading | |
| 23 import time | |
| 24 from xml.dom import minidom | |
| 25 import xml.parsers.expat | |
| 26 | |
| 27 # Directory with this file. | |
| 28 BASE_DIR = os.path.dirname(os.path.abspath(__file__)) | |
| 29 # Root of a repository. | |
| 30 ROOT_DIR = os.path.dirname(BASE_DIR) | |
| 31 # Name of the optional package with all dependencies. | |
| 32 DEPENDENCIES_ZIP = os.path.join(BASE_DIR, 'run_isolated.zip') | |
| 33 | |
| 34 # When running in isolated environment, dependencies is in zipped package. | |
| 35 if os.path.exists(DEPENDENCIES_ZIP): | |
| 36 sys.path.insert(0, DEPENDENCIES_ZIP) | |
| 37 else: | |
| 38 # Otherwise it is in the root of the repository. | |
| 39 if not ROOT_DIR in sys.path: | |
| 40 sys.path.insert(0, ROOT_DIR) | |
| 41 | |
| 42 | |
| 43 from utils import threading_utils | |
| 44 from utils import tools | |
| 45 | |
| 46 | |
| 47 # These are known to influence the way the output is generated. | |
| 48 KNOWN_GTEST_ENV_VARS = [ | |
| 49 'GTEST_ALSO_RUN_DISABLED_TESTS', | |
| 50 'GTEST_BREAK_ON_FAILURE', | |
| 51 'GTEST_CATCH_EXCEPTIONS', | |
| 52 'GTEST_COLOR', | |
| 53 'GTEST_FILTER', | |
| 54 'GTEST_OUTPUT', | |
| 55 'GTEST_PRINT_TIME', | |
| 56 'GTEST_RANDOM_SEED', | |
| 57 'GTEST_REPEAT', | |
| 58 'GTEST_SHARD_INDEX', | |
| 59 'GTEST_SHARD_STATUS_FILE', | |
| 60 'GTEST_SHUFFLE', | |
| 61 'GTEST_THROW_ON_FAILURE', | |
| 62 'GTEST_TOTAL_SHARDS', | |
| 63 ] | |
| 64 | |
| 65 # These needs to be poped out before running a test. | |
| 66 GTEST_ENV_VARS_TO_REMOVE = [ | |
| 67 'GTEST_ALSO_RUN_DISABLED_TESTS', | |
| 68 'GTEST_FILTER', | |
| 69 'GTEST_OUTPUT', | |
| 70 'GTEST_RANDOM_SEED', | |
| 71 # TODO(maruel): Handle. | |
| 72 'GTEST_REPEAT', | |
| 73 'GTEST_SHARD_INDEX', | |
| 74 # TODO(maruel): Handle. | |
| 75 'GTEST_SHUFFLE', | |
| 76 'GTEST_TOTAL_SHARDS', | |
| 77 ] | |
| 78 | |
| 79 | |
| 80 RUN_PREFIX = '[ RUN ] ' | |
| 81 OK_PREFIX = '[ OK ] ' | |
| 82 FAILED_PREFIX = '[ FAILED ] ' | |
| 83 | |
| 84 | |
| 85 if subprocess.mswindows: | |
| 86 import msvcrt # pylint: disable=F0401 | |
| 87 from ctypes import wintypes | |
| 88 from ctypes import windll | |
| 89 | |
| 90 def ReadFile(handle, desired_bytes): | |
| 91 """Calls kernel32.ReadFile().""" | |
| 92 c_read = wintypes.DWORD() | |
| 93 buff = wintypes.create_string_buffer(desired_bytes+1) | |
| 94 windll.kernel32.ReadFile( | |
| 95 handle, buff, desired_bytes, wintypes.byref(c_read), None) | |
| 96 # NULL terminate it. | |
| 97 buff[c_read.value] = '\x00' | |
| 98 return wintypes.GetLastError(), buff.value | |
| 99 | |
| 100 def PeekNamedPipe(handle): | |
| 101 """Calls kernel32.PeekNamedPipe(). Simplified version.""" | |
| 102 c_avail = wintypes.DWORD() | |
| 103 c_message = wintypes.DWORD() | |
| 104 success = windll.kernel32.PeekNamedPipe( | |
| 105 handle, None, 0, None, wintypes.byref(c_avail), | |
| 106 wintypes.byref(c_message)) | |
| 107 if not success: | |
| 108 raise OSError(wintypes.GetLastError()) | |
| 109 return c_avail.value | |
| 110 | |
| 111 def recv_multi_impl(conns, maxsize, timeout): | |
| 112 """Reads from the first available pipe. | |
| 113 | |
| 114 If timeout is None, it's blocking. If timeout is 0, it is not blocking. | |
| 115 """ | |
| 116 # TODO(maruel): Use WaitForMultipleObjects(). Python creates anonymous pipes | |
| 117 # for proc.stdout and proc.stderr but they are implemented as named pipes on | |
| 118 # Windows. Since named pipes are not waitable object, they can't be passed | |
| 119 # as-is to WFMO(). So this means N times CreateEvent(), N times ReadFile() | |
| 120 # and finally WFMO(). This requires caching the events handles in the Popen | |
| 121 # object and remembering the pending ReadFile() calls. This will require | |
| 122 # some re-architecture. | |
| 123 maxsize = max(maxsize or 16384, 1) | |
| 124 if timeout: | |
| 125 start = time.time() | |
| 126 handles = [msvcrt.get_osfhandle(conn.fileno()) for conn in conns] | |
| 127 while handles: | |
| 128 for i, handle in enumerate(handles): | |
| 129 try: | |
| 130 avail = min(PeekNamedPipe(handle), maxsize) | |
| 131 if avail: | |
| 132 return i, ReadFile(handle, avail)[1] | |
| 133 if (timeout and (time.time() - start) >= timeout) or timeout == 0: | |
| 134 return None, None | |
| 135 # Polling rocks. | |
| 136 time.sleep(0.001) | |
| 137 except OSError: | |
| 138 handles.pop(i) | |
| 139 break | |
| 140 # Nothing to wait for. | |
| 141 return None, None | |
| 142 | |
| 143 else: | |
| 144 import fcntl # pylint: disable=F0401 | |
| 145 import select | |
| 146 | |
| 147 def recv_multi_impl(conns, maxsize, timeout): | |
| 148 """Reads from the first available pipe. | |
| 149 | |
| 150 If timeout is None, it's blocking. If timeout is 0, it is not blocking. | |
| 151 """ | |
| 152 try: | |
| 153 r, _, _ = select.select(conns, [], [], timeout) | |
| 154 except select.error: | |
| 155 return None, None | |
| 156 if not r: | |
| 157 return None, None | |
| 158 | |
| 159 conn = r[0] | |
| 160 # Temporarily make it non-blocking. | |
| 161 flags = fcntl.fcntl(conn, fcntl.F_GETFL) | |
| 162 if not conn.closed: | |
| 163 # pylint: disable=E1101 | |
| 164 fcntl.fcntl(conn, fcntl.F_SETFL, flags | os.O_NONBLOCK) | |
| 165 try: | |
| 166 data = conn.read(max(maxsize or 16384, 1)) | |
| 167 return conns.index(conn), data | |
| 168 finally: | |
| 169 if not conn.closed: | |
| 170 fcntl.fcntl(conn, fcntl.F_SETFL, flags) | |
| 171 | |
| 172 | |
| 173 class Failure(Exception): | |
| 174 pass | |
| 175 | |
| 176 | |
| 177 class Popen(subprocess.Popen): | |
| 178 """Adds timeout support on stdout and stderr. | |
| 179 | |
| 180 Inspired by | |
| 181 http://code.activestate.com/recipes/440554-module-to-allow-asynchronous-subpro
cess-use-on-win/ | |
| 182 """ | |
| 183 def __init__(self, *args, **kwargs): | |
| 184 self.start = time.time() | |
| 185 self.end = None | |
| 186 super(Popen, self).__init__(*args, **kwargs) | |
| 187 | |
| 188 def duration(self): | |
| 189 """Duration of the child process. | |
| 190 | |
| 191 It is greater or equal to the actual time the child process ran. It can be | |
| 192 significantly higher than the real value if neither .wait() nor .poll() was | |
| 193 used. | |
| 194 """ | |
| 195 return (self.end or time.time()) - self.start | |
| 196 | |
| 197 def wait(self): | |
| 198 ret = super(Popen, self).wait() | |
| 199 if not self.end: | |
| 200 # communicate() uses wait() internally. | |
| 201 self.end = time.time() | |
| 202 return ret | |
| 203 | |
| 204 def poll(self): | |
| 205 ret = super(Popen, self).poll() | |
| 206 if ret is not None and not self.end: | |
| 207 self.end = time.time() | |
| 208 return ret | |
| 209 | |
| 210 def yield_any(self, timeout=None): | |
| 211 """Yields output until the process terminates or is killed by a timeout. | |
| 212 | |
| 213 Yielded values are in the form (pipename, data). | |
| 214 | |
| 215 If timeout is None, it is blocking. If timeout is 0, it doesn't block. This | |
| 216 is not generally useful to use timeout=0. | |
| 217 """ | |
| 218 remaining = 0 | |
| 219 while self.poll() is None: | |
| 220 if timeout: | |
| 221 # While these float() calls seem redundant, they are to force | |
| 222 # ResetableTimeout to "render" itself into a float. At each call, the | |
| 223 # resulting value could be different, depending if a .reset() call | |
| 224 # occurred. | |
| 225 remaining = max(float(timeout) - self.duration(), 0.001) | |
| 226 else: | |
| 227 remaining = timeout | |
| 228 t, data = self.recv_any(timeout=remaining) | |
| 229 if data or timeout == 0: | |
| 230 yield (t, data) | |
| 231 if timeout and self.duration() >= float(timeout): | |
| 232 break | |
| 233 if self.poll() is None and timeout and self.duration() >= float(timeout): | |
| 234 logging.debug('Kill %s %s', self.duration(), float(timeout)) | |
| 235 self.kill() | |
| 236 self.wait() | |
| 237 # Read all remaining output in the pipes. | |
| 238 while True: | |
| 239 t, data = self.recv_any() | |
| 240 if not data: | |
| 241 break | |
| 242 yield (t, data) | |
| 243 | |
| 244 def recv_any(self, maxsize=None, timeout=None): | |
| 245 """Reads from stderr and if empty, from stdout. | |
| 246 | |
| 247 If timeout is None, it is blocking. If timeout is 0, it doesn't block. | |
| 248 """ | |
| 249 pipes = [ | |
| 250 x for x in ((self.stderr, 'stderr'), (self.stdout, 'stdout')) if x[0] | |
| 251 ] | |
| 252 if len(pipes) == 2 and self.stderr.fileno() == self.stdout.fileno(): | |
| 253 pipes.pop(0) | |
| 254 if not pipes: | |
| 255 return None, None | |
| 256 conns, names = zip(*pipes) | |
| 257 index, data = recv_multi_impl(conns, maxsize, timeout) | |
| 258 if index is None: | |
| 259 return index, data | |
| 260 if not data: | |
| 261 self._close(names[index]) | |
| 262 return None, None | |
| 263 if self.universal_newlines: | |
| 264 data = self._translate_newlines(data) | |
| 265 return names[index], data | |
| 266 | |
| 267 def recv_out(self, maxsize=None, timeout=None): | |
| 268 """Reads from stdout asynchronously.""" | |
| 269 return self._recv('stdout', maxsize, timeout) | |
| 270 | |
| 271 def recv_err(self, maxsize=None, timeout=None): | |
| 272 """Reads from stderr asynchronously.""" | |
| 273 return self._recv('stderr', maxsize, timeout) | |
| 274 | |
| 275 def _close(self, which): | |
| 276 getattr(self, which).close() | |
| 277 setattr(self, which, None) | |
| 278 | |
| 279 def _recv(self, which, maxsize, timeout): | |
| 280 conn = getattr(self, which) | |
| 281 if conn is None: | |
| 282 return None | |
| 283 data = recv_multi_impl([conn], maxsize, timeout) | |
| 284 if not data: | |
| 285 return self._close(which) | |
| 286 if self.universal_newlines: | |
| 287 data = self._translate_newlines(data) | |
| 288 return data | |
| 289 | |
| 290 | |
| 291 def call_with_timeout(cmd, timeout, **kwargs): | |
| 292 """Runs an executable with an optional timeout. | |
| 293 | |
| 294 timeout 0 or None disables the timeout. | |
| 295 """ | |
| 296 proc = Popen( | |
| 297 cmd, | |
| 298 stdin=subprocess.PIPE, | |
| 299 stdout=subprocess.PIPE, | |
| 300 **kwargs) | |
| 301 if timeout: | |
| 302 out = '' | |
| 303 err = '' | |
| 304 for t, data in proc.yield_any(timeout): | |
| 305 if t == 'stdout': | |
| 306 out += data | |
| 307 else: | |
| 308 err += data | |
| 309 else: | |
| 310 # This code path is much faster. | |
| 311 out, err = proc.communicate() | |
| 312 return out, err, proc.returncode, proc.duration() | |
| 313 | |
| 314 | |
| 315 def setup_gtest_env(): | |
| 316 """Copy the enviroment variables and setup for running a gtest.""" | |
| 317 env = os.environ.copy() | |
| 318 for name in GTEST_ENV_VARS_TO_REMOVE: | |
| 319 env.pop(name, None) | |
| 320 | |
| 321 # Forcibly enable color by default, if not already disabled. | |
| 322 env.setdefault('GTEST_COLOR', 'on') | |
| 323 | |
| 324 return env | |
| 325 | |
| 326 | |
| 327 def gtest_list_tests(cmd, cwd): | |
| 328 """List all the test cases for a google test. | |
| 329 | |
| 330 See more info at http://code.google.com/p/googletest/. | |
| 331 """ | |
| 332 cmd = cmd[:] | |
| 333 cmd.append('--gtest_list_tests') | |
| 334 env = setup_gtest_env() | |
| 335 timeout = 0. | |
| 336 try: | |
| 337 out, err, returncode, _ = call_with_timeout( | |
| 338 cmd, | |
| 339 timeout, | |
| 340 stderr=subprocess.PIPE, | |
| 341 env=env, | |
| 342 cwd=cwd) | |
| 343 except OSError, e: | |
| 344 raise Failure('Failed to run %s\ncwd=%s\n%s' % (' '.join(cmd), cwd, str(e))) | |
| 345 if returncode: | |
| 346 raise Failure( | |
| 347 'Failed to run %s\nstdout:\n%s\nstderr:\n%s' % | |
| 348 (' '.join(cmd), out, err), returncode) | |
| 349 # pylint: disable=E1103 | |
| 350 if err and not err.startswith('Xlib: extension "RANDR" missing on display '): | |
| 351 logging.error('Unexpected spew in gtest_list_tests:\n%s\n%s', err, cmd) | |
| 352 return out | |
| 353 | |
| 354 | |
| 355 def filter_shards(tests, index, shards): | |
| 356 """Filters the shards. | |
| 357 | |
| 358 Watch out about integer based arithmetics. | |
| 359 """ | |
| 360 # The following code could be made more terse but I liked the extra clarity. | |
| 361 assert 0 <= index < shards | |
| 362 total = len(tests) | |
| 363 quotient, remainder = divmod(total, shards) | |
| 364 # 1 item of each remainder is distributed over the first 0:remainder shards. | |
| 365 # For example, with total == 5, index == 1, shards == 3 | |
| 366 # min_bound == 2, max_bound == 4. | |
| 367 min_bound = quotient * index + min(index, remainder) | |
| 368 max_bound = quotient * (index + 1) + min(index + 1, remainder) | |
| 369 return tests[min_bound:max_bound] | |
| 370 | |
| 371 | |
| 372 def _starts_with(a, b, prefix): | |
| 373 return a.startswith(prefix) or b.startswith(prefix) | |
| 374 | |
| 375 | |
| 376 def is_valid_test_case(test, disabled): | |
| 377 """Returns False on malformed or DISABLED_ test cases.""" | |
| 378 if not '.' in test: | |
| 379 logging.error('Ignoring unknown test %s', test) | |
| 380 return False | |
| 381 fixture, case = test.split('.', 1) | |
| 382 if not disabled and _starts_with(fixture, case, 'DISABLED_'): | |
| 383 return False | |
| 384 return True | |
| 385 | |
| 386 | |
| 387 def filter_bad_tests(tests, disabled): | |
| 388 """Filters out malformed or DISABLED_ test cases.""" | |
| 389 return [test for test in tests if is_valid_test_case(test, disabled)] | |
| 390 | |
| 391 | |
| 392 def chromium_is_valid_test_case(test, disabled, fails, flaky, pre, manual): | |
| 393 """Return False on chromium specific bad tests in addition to | |
| 394 is_valid_test_case(). | |
| 395 | |
| 396 FAILS_, FLAKY_, PRE_, MANUAL_ and other weird Chromium-specific test cases. | |
| 397 """ | |
| 398 if not is_valid_test_case(test, disabled): | |
| 399 return False | |
| 400 fixture, case = test.split('.', 1) | |
| 401 if not fails and _starts_with(fixture, case, 'FAILS_'): | |
| 402 return False | |
| 403 if not flaky and _starts_with(fixture, case, 'FLAKY_'): | |
| 404 return False | |
| 405 if not pre and _starts_with(fixture, case, 'PRE_'): | |
| 406 return False | |
| 407 if not manual and _starts_with(fixture, case, 'MANUAL_'): | |
| 408 return False | |
| 409 if test == 'InProcessBrowserTest.Empty': | |
| 410 return False | |
| 411 return True | |
| 412 | |
| 413 | |
| 414 def chromium_filter_bad_tests(tests, disabled, fails, flaky, pre, manual): | |
| 415 """Filters out chromium specific bad tests in addition to filter_bad_tests(). | |
| 416 | |
| 417 Filters out FAILS_, FLAKY_, PRE_, MANUAL_ and other weird Chromium-specific | |
| 418 test cases. | |
| 419 """ | |
| 420 return [ | |
| 421 test for test in tests if chromium_is_valid_test_case( | |
| 422 test, disabled, fails, flaky, pre, manual) | |
| 423 ] | |
| 424 | |
| 425 | |
| 426 def chromium_filter_pre_tests(test_case_results): | |
| 427 """Filters out PRE_ test case results.""" | |
| 428 return ( | |
| 429 i for i in test_case_results if chromium_is_valid_test_case( | |
| 430 i['test_case'], | |
| 431 disabled=True, | |
| 432 fails=True, | |
| 433 flaky=True, | |
| 434 pre=False, | |
| 435 manual=True)) | |
| 436 | |
| 437 | |
| 438 def parse_gtest_cases(out, seed): | |
| 439 """Returns the flattened list of test cases in the executable. | |
| 440 | |
| 441 The returned list is sorted so it is not dependent on the order of the linked | |
| 442 objects. Then |seed| is applied to deterministically shuffle the list if | |
| 443 |seed| is a positive value. The rationale is that the probability of two test | |
| 444 cases stomping on each other when run simultaneously is high for test cases in | |
| 445 the same fixture. By shuffling the tests, the probability of these badly | |
| 446 written tests running simultaneously, let alone being in the same shard, is | |
| 447 lower. | |
| 448 | |
| 449 Expected format is a concatenation of this: | |
| 450 TestFixture1 | |
| 451 TestCase1 | |
| 452 TestCase2 | |
| 453 """ | |
| 454 tests = [] | |
| 455 fixture = None | |
| 456 lines = out.splitlines() | |
| 457 while lines: | |
| 458 line = lines.pop(0) | |
| 459 if not line: | |
| 460 break | |
| 461 if not line.startswith(' '): | |
| 462 fixture = line | |
| 463 else: | |
| 464 case = line[2:] | |
| 465 if case.startswith('YOU HAVE'): | |
| 466 # It's a 'YOU HAVE foo bar' line. We're done. | |
| 467 break | |
| 468 assert ' ' not in case | |
| 469 tests.append(fixture + case) | |
| 470 tests = sorted(tests) | |
| 471 if seed: | |
| 472 # Sadly, python's random module doesn't permit local seeds. | |
| 473 state = random.getstate() | |
| 474 try: | |
| 475 # This is totally deterministic. | |
| 476 random.seed(seed) | |
| 477 random.shuffle(tests) | |
| 478 finally: | |
| 479 random.setstate(state) | |
| 480 return tests | |
| 481 | |
| 482 | |
| 483 def list_test_cases(cmd, cwd, index, shards, seed, disabled): | |
| 484 """Returns the list of test cases according to the specified criterias.""" | |
| 485 tests = parse_gtest_cases(gtest_list_tests(cmd, cwd), seed) | |
| 486 | |
| 487 # TODO(maruel): Splitting shards before filtering bad test cases could result | |
| 488 # in inbalanced shards. | |
| 489 if shards: | |
| 490 tests = filter_shards(tests, index, shards) | |
| 491 return filter_bad_tests(tests, disabled) | |
| 492 | |
| 493 | |
| 494 def chromium_list_test_cases( | |
| 495 cmd, cwd, index, shards, seed, disabled, fails, flaky, pre, manual): | |
| 496 """Returns the list of test cases according to the specified criterias.""" | |
| 497 tests = list_test_cases(cmd, cwd, index, shards, seed, disabled) | |
| 498 return chromium_filter_bad_tests(tests, disabled, fails, flaky, pre, manual) | |
| 499 | |
| 500 | |
| 501 class RunSome(object): | |
| 502 """Thread-safe object deciding if testing should continue.""" | |
| 503 def __init__( | |
| 504 self, expected_count, retries, min_failures, max_failure_ratio, | |
| 505 max_failures): | |
| 506 """Determines if it is better to give up testing after an amount of failures | |
| 507 and successes. | |
| 508 | |
| 509 Arguments: | |
| 510 - expected_count is the expected number of elements to run. | |
| 511 - retries is how many time a failing element can be retried. retries should | |
| 512 be set to the maximum number of retries per failure. This permits | |
| 513 dampening the curve to determine threshold where to stop. | |
| 514 - min_failures is the minimal number of failures to tolerate, to put a lower | |
| 515 limit when expected_count is small. This value is multiplied by the number | |
| 516 of retries. | |
| 517 - max_failure_ratio is the ratio of permitted failures, e.g. 0.1 to stop | |
| 518 after 10% of failed test cases. | |
| 519 - max_failures is the absolute maximum number of tolerated failures or None. | |
| 520 | |
| 521 For large values of expected_count, the number of tolerated failures will be | |
| 522 at maximum "(expected_count * retries) * max_failure_ratio". | |
| 523 | |
| 524 For small values of expected_count, the number of tolerated failures will be | |
| 525 at least "min_failures * retries". | |
| 526 """ | |
| 527 assert 0 < expected_count | |
| 528 assert 0 <= retries < 100 | |
| 529 assert 0 <= min_failures | |
| 530 assert 0. < max_failure_ratio < 1. | |
| 531 # Constants. | |
| 532 self._expected_count = expected_count | |
| 533 self._retries = retries | |
| 534 self._min_failures = min_failures | |
| 535 self._max_failure_ratio = max_failure_ratio | |
| 536 | |
| 537 self._min_failures_tolerated = self._min_failures * (self._retries + 1) | |
| 538 # Pre-calculate the maximum number of allowable failures. Note that | |
| 539 # _max_failures can be lower than _min_failures. | |
| 540 self._max_failures_tolerated = round( | |
| 541 (expected_count * (retries + 1)) * max_failure_ratio) | |
| 542 if max_failures is not None: | |
| 543 # Override the ratio if necessary. | |
| 544 self._max_failures_tolerated = min( | |
| 545 self._max_failures_tolerated, max_failures) | |
| 546 self._min_failures_tolerated = min( | |
| 547 self._min_failures_tolerated, max_failures) | |
| 548 | |
| 549 # Variables. | |
| 550 self._lock = threading.Lock() | |
| 551 self._passed = 0 | |
| 552 self._failures = 0 | |
| 553 self.stopped = False | |
| 554 | |
| 555 def should_stop(self): | |
| 556 """Stops once a threshold was reached. This includes retries.""" | |
| 557 with self._lock: | |
| 558 if self.stopped: | |
| 559 return True | |
| 560 # Accept at least the minimum number of failures. | |
| 561 if self._failures <= self._min_failures_tolerated: | |
| 562 return False | |
| 563 if self._failures >= self._max_failures_tolerated: | |
| 564 self.stopped = True | |
| 565 return self.stopped | |
| 566 | |
| 567 def got_result(self, passed): | |
| 568 with self._lock: | |
| 569 if passed: | |
| 570 self._passed += 1 | |
| 571 else: | |
| 572 self._failures += 1 | |
| 573 | |
| 574 def __str__(self): | |
| 575 return '%s(%d, %d, %d, %.3f)' % ( | |
| 576 self.__class__.__name__, | |
| 577 self._expected_count, | |
| 578 self._retries, | |
| 579 self._min_failures, | |
| 580 self._max_failure_ratio) | |
| 581 | |
| 582 | |
| 583 class RunAll(object): | |
| 584 """Never fails.""" | |
| 585 stopped = False | |
| 586 | |
| 587 @staticmethod | |
| 588 def should_stop(): | |
| 589 return False | |
| 590 | |
| 591 @staticmethod | |
| 592 def got_result(_): | |
| 593 pass | |
| 594 | |
| 595 | |
| 596 def process_output(lines, test_cases): | |
| 597 """Yield the data of each test cases. | |
| 598 | |
| 599 Expects the test cases to be run in the order of the list. | |
| 600 | |
| 601 Handles the following google-test behavior: | |
| 602 - Test case crash causing a partial number of test cases to be run. | |
| 603 - Invalid test case name so the test case wasn't run at all. | |
| 604 | |
| 605 This function automatically distribute the startup cost across each test case. | |
| 606 """ | |
| 607 test_cases = test_cases[:] | |
| 608 test_case = None | |
| 609 test_case_data = None | |
| 610 # Accumulates the junk between test cases. | |
| 611 accumulation = '' | |
| 612 eat_last_lines = False | |
| 613 | |
| 614 for line in lines: | |
| 615 if eat_last_lines: | |
| 616 test_case_data['output'] += line | |
| 617 continue | |
| 618 | |
| 619 i = line.find(RUN_PREFIX) | |
| 620 if i > 0 and test_case_data: | |
| 621 # This may occur specifically in browser_tests, because the test case is | |
| 622 # run in a child process. If the child process doesn't terminate its | |
| 623 # output with a LF, it may cause the "[ RUN ]" line to be improperly | |
| 624 # printed out in the middle of a line. | |
| 625 test_case_data['output'] += line[:i] | |
| 626 line = line[i:] | |
| 627 i = 0 | |
| 628 if i >= 0: | |
| 629 if test_case: | |
| 630 # The previous test case had crashed. No idea about its duration | |
| 631 test_case_data['returncode'] = 1 | |
| 632 test_case_data['duration'] = 0 | |
| 633 test_case_data['crashed'] = True | |
| 634 yield test_case_data | |
| 635 | |
| 636 test_case = line[len(RUN_PREFIX):].strip().split(' ', 1)[0] | |
| 637 # Accept the test case even if it was unexpected. | |
| 638 if test_case in test_cases: | |
| 639 test_cases.remove(test_case) | |
| 640 else: | |
| 641 logging.warning('Unexpected test case: %s', test_case) | |
| 642 test_case_data = { | |
| 643 'test_case': test_case, | |
| 644 'returncode': None, | |
| 645 'duration': None, | |
| 646 'output': accumulation + line, | |
| 647 } | |
| 648 accumulation = '' | |
| 649 | |
| 650 elif test_case: | |
| 651 test_case_data['output'] += line | |
| 652 i = line.find(OK_PREFIX) | |
| 653 if i >= 0: | |
| 654 result = 0 | |
| 655 line = line[i + len(OK_PREFIX):] | |
| 656 else: | |
| 657 i = line.find(FAILED_PREFIX) | |
| 658 if i >= 0: | |
| 659 line = line[i + len(FAILED_PREFIX):] | |
| 660 result = 1 | |
| 661 if i >= 0: | |
| 662 # The test completed. It's important to make sure the test case name | |
| 663 # match too, since it could be a fake output. | |
| 664 if line.startswith(test_case): | |
| 665 line = line[len(test_case):] | |
| 666 match = re.search(r' \((\d+) ms\)', line) | |
| 667 if match: | |
| 668 test_case_data['duration'] = float(match.group(1)) / 1000. | |
| 669 else: | |
| 670 # Make sure duration is at least not None since the test case ran. | |
| 671 test_case_data['duration'] = 0 | |
| 672 test_case_data['returncode'] = result | |
| 673 if not test_cases: | |
| 674 # Its the last test case. Eat all the remaining lines. | |
| 675 eat_last_lines = True | |
| 676 continue | |
| 677 yield test_case_data | |
| 678 test_case = None | |
| 679 test_case_data = None | |
| 680 else: | |
| 681 accumulation += line | |
| 682 | |
| 683 # It's guaranteed here that the lines generator is exhausted. | |
| 684 if eat_last_lines: | |
| 685 yield test_case_data | |
| 686 test_case = None | |
| 687 test_case_data = None | |
| 688 | |
| 689 if test_case_data: | |
| 690 # This means the last one likely crashed. | |
| 691 test_case_data['crashed'] = True | |
| 692 test_case_data['duration'] = 0 | |
| 693 test_case_data['returncode'] = 1 | |
| 694 test_case_data['output'] += accumulation | |
| 695 yield test_case_data | |
| 696 | |
| 697 # If test_cases is not empty, these test cases were not run. | |
| 698 for t in test_cases: | |
| 699 yield { | |
| 700 'test_case': t, | |
| 701 'returncode': None, | |
| 702 'duration': None, | |
| 703 'output': None, | |
| 704 } | |
| 705 | |
| 706 | |
| 707 def convert_to_lines(generator): | |
| 708 """Turn input coming from a generator into lines. | |
| 709 | |
| 710 It is Windows-friendly. | |
| 711 """ | |
| 712 accumulator = '' | |
| 713 for data in generator: | |
| 714 items = (accumulator + data).splitlines(True) | |
| 715 for item in items[:-1]: | |
| 716 yield item | |
| 717 if items[-1].endswith(('\r', '\n')): | |
| 718 yield items[-1] | |
| 719 accumulator = '' | |
| 720 else: | |
| 721 accumulator = items[-1] | |
| 722 if accumulator: | |
| 723 yield accumulator | |
| 724 | |
| 725 | |
| 726 class ResetableTimeout(object): | |
| 727 """A resetable timeout that acts as a float. | |
| 728 | |
| 729 At each reset, the timeout is increased so that it still has the equivalent | |
| 730 of the original timeout value, but according to 'now' at the time of the | |
| 731 reset. | |
| 732 """ | |
| 733 def __init__(self, timeout): | |
| 734 assert timeout >= 0. | |
| 735 self.timeout = float(timeout) | |
| 736 self.last_reset = time.time() | |
| 737 | |
| 738 def reset(self): | |
| 739 """Respendish the timeout.""" | |
| 740 now = time.time() | |
| 741 self.timeout += max(0., now - self.last_reset) | |
| 742 self.last_reset = now | |
| 743 return now | |
| 744 | |
| 745 @staticmethod | |
| 746 def __bool__(): | |
| 747 return True | |
| 748 | |
| 749 def __float__(self): | |
| 750 """To be used as a timeout value for a function call.""" | |
| 751 return self.timeout | |
| 752 | |
| 753 | |
| 754 class GoogleTestRunner(object): | |
| 755 """Immutable settings to run many test cases in a loop.""" | |
| 756 def __init__( | |
| 757 self, | |
| 758 cmd, | |
| 759 cwd_dir, | |
| 760 timeout, | |
| 761 progress, | |
| 762 retries, | |
| 763 decider, | |
| 764 verbose, | |
| 765 add_task, | |
| 766 add_serial_task, | |
| 767 filter_results): | |
| 768 """Defines how to run a googletest executable. | |
| 769 | |
| 770 Arguments: | |
| 771 - cmd: command line to start with. | |
| 772 - cwd_dir: directory to start the app in. | |
| 773 - timeout: timeout while waiting for output. | |
| 774 - progress: object to present the user with status updates. | |
| 775 - retries: number of allowed retries. For example if 2, the test case will | |
| 776 be tried 3 times in total. | |
| 777 - decider: object to decide if the run should be stopped early. | |
| 778 - verbose: inconditionally prints output. | |
| 779 - add_task: function to add the task back when failing, for retry. | |
| 780 - add_serial_task: function to add the task back when failing too often so | |
| 781 it should be run serially. | |
| 782 - filter_results: optional function to filter undesired extraneous test case | |
| 783 run without our consent. | |
| 784 """ | |
| 785 self.cmd = cmd[:] | |
| 786 self.cwd_dir = cwd_dir | |
| 787 self.timeout = timeout | |
| 788 self.progress = progress | |
| 789 self.retries = retries | |
| 790 self.decider = decider | |
| 791 self.verbose = verbose | |
| 792 self.add_task = add_task | |
| 793 self.add_serial_task = add_serial_task | |
| 794 self.filter_results = filter_results or (lambda x: x) | |
| 795 # It is important to remove the shard environment variables since it could | |
| 796 # conflict with --gtest_filter. | |
| 797 self.env = setup_gtest_env() | |
| 798 | |
| 799 def map(self, priority, test_cases, try_count): | |
| 800 """Traces a single test case and returns its output. | |
| 801 | |
| 802 try_count is 0 based, the original try is 0. | |
| 803 """ | |
| 804 if self.decider.should_stop(): | |
| 805 raise StopIteration() | |
| 806 cmd = self.cmd + ['--gtest_filter=%s' % ':'.join(test_cases)] | |
| 807 if '--gtest_print_time' not in cmd: | |
| 808 cmd.append('--gtest_print_time') | |
| 809 proc = Popen( | |
| 810 cmd, | |
| 811 cwd=self.cwd_dir, | |
| 812 stdout=subprocess.PIPE, | |
| 813 stderr=subprocess.STDOUT, | |
| 814 env=self.env) | |
| 815 | |
| 816 # Use an intelligent timeout that can be reset. The idea is simple, the | |
| 817 # timeout is set to the value of the timeout for a single test case. | |
| 818 # Everytime a test case is parsed, the timeout is reset to its full value. | |
| 819 # proc.yield_any() uses float() to extract the instantaneous value of | |
| 820 # 'timeout'. | |
| 821 timeout = ResetableTimeout(self.timeout) | |
| 822 | |
| 823 # Create a pipeline of generators. | |
| 824 gen_lines = convert_to_lines(data for _, data in proc.yield_any(timeout)) | |
| 825 # It needs to be valid utf-8 otherwise it can't be stored. | |
| 826 # TODO(maruel): Be more intelligent than decoding to ascii. | |
| 827 gen_lines_utf8 = ( | |
| 828 line.decode('ascii', 'ignore').encode('utf-8') for line in gen_lines) | |
| 829 gen_test_cases = process_output(gen_lines_utf8, test_cases) | |
| 830 last_timestamp = proc.start | |
| 831 got_failure_at_least_once = False | |
| 832 results = [] | |
| 833 for i in self.filter_results(gen_test_cases): | |
| 834 results.append(i) | |
| 835 now = timeout.reset() | |
| 836 test_case_has_passed = (i['returncode'] == 0) | |
| 837 if i['duration'] is None: | |
| 838 assert not test_case_has_passed | |
| 839 # Do not notify self.decider, because an early crash in a large cluster | |
| 840 # could cause the test to quit early. | |
| 841 else: | |
| 842 i['duration'] = max(i['duration'], now - last_timestamp) | |
| 843 # A new test_case completed. | |
| 844 self.decider.got_result(test_case_has_passed) | |
| 845 | |
| 846 need_to_retry = not test_case_has_passed and try_count < self.retries | |
| 847 got_failure_at_least_once |= not test_case_has_passed | |
| 848 last_timestamp = now | |
| 849 | |
| 850 # Create the line to print out. | |
| 851 if i['duration'] is not None: | |
| 852 duration = '(%.2fs)' % i['duration'] | |
| 853 else: | |
| 854 duration = '<unknown>' | |
| 855 if try_count: | |
| 856 line = '%s %s - retry #%d' % (i['test_case'], duration, try_count) | |
| 857 else: | |
| 858 line = '%s %s' % (i['test_case'], duration) | |
| 859 if self.verbose or not test_case_has_passed or try_count > 0: | |
| 860 # Print output in one of three cases: | |
| 861 # - --verbose was specified. | |
| 862 # - The test failed. | |
| 863 # - The wasn't the first attempt (this is needed so the test parser can | |
| 864 # detect that a test has been successfully retried). | |
| 865 if i['output']: | |
| 866 line += '\n' + i['output'] | |
| 867 self.progress.update_item(line, index=1, size=int(need_to_retry)) | |
| 868 | |
| 869 if need_to_retry: | |
| 870 priority = self._retry(priority, i['test_case'], try_count) | |
| 871 | |
| 872 # Delay yielding when only one test case is running, in case of a | |
| 873 # crash-after-succeed. | |
| 874 if len(test_cases) > 1: | |
| 875 yield i | |
| 876 | |
| 877 if proc.returncode and not got_failure_at_least_once: | |
| 878 if results and len(test_cases) == 1: | |
| 879 # Crash after pass. | |
| 880 results[-1]['returncode'] = proc.returncode | |
| 881 | |
| 882 if try_count < self.retries: | |
| 883 # This is tricky, one of the test case failed but each did print that | |
| 884 # they succeeded! Retry them *all* individually. | |
| 885 if not self.verbose and not try_count: | |
| 886 # Print all the output as one shot when not verbose to be sure the | |
| 887 # potential stack trace is printed. | |
| 888 output = ''.join(i['output'] for i in results) | |
| 889 self.progress.update_item(output, raw=True) | |
| 890 for i in results: | |
| 891 priority = self._retry(priority, i['test_case'], try_count) | |
| 892 self.progress.update_item('', size=1) | |
| 893 | |
| 894 # Only yield once the process completed when there is only one test case as | |
| 895 # a safety precaution. | |
| 896 if results and len(test_cases) == 1: | |
| 897 yield results[-1] | |
| 898 | |
| 899 def _retry(self, priority, test_case, try_count): | |
| 900 """Adds back the same task again only if relevant. | |
| 901 | |
| 902 It may add it either at lower (e.g. higher value) priority or at the end of | |
| 903 the serially executed list. | |
| 904 """ | |
| 905 if try_count + 1 < self.retries: | |
| 906 # The test failed and needs to be retried normally. | |
| 907 # Leave a buffer of ~40 test cases before retrying. | |
| 908 priority += 40 | |
| 909 self.add_task(priority, self.map, priority, [test_case], try_count + 1) | |
| 910 else: | |
| 911 # This test only has one retry left, so the final retry should be | |
| 912 # done serially. | |
| 913 self.add_serial_task( | |
| 914 priority, self.map, priority, [test_case], try_count + 1) | |
| 915 return priority | |
| 916 | |
| 917 | |
| 918 class ChromiumGoogleTestRunner(GoogleTestRunner): | |
| 919 def __init__(self, *args, **kwargs): | |
| 920 super(ChromiumGoogleTestRunner, self).__init__( | |
| 921 *args, filter_results=chromium_filter_pre_tests, **kwargs) | |
| 922 | |
| 923 | |
| 924 def get_test_cases( | |
| 925 cmd, cwd, whitelist, blacklist, index, shards, seed, disabled, fails, flaky, | |
| 926 manual): | |
| 927 """Returns the filtered list of test cases. | |
| 928 | |
| 929 This is done synchronously. | |
| 930 """ | |
| 931 try: | |
| 932 # List all the test cases if a whitelist is used. | |
| 933 tests = chromium_list_test_cases( | |
| 934 cmd, | |
| 935 cwd, | |
| 936 index=index, | |
| 937 shards=shards, | |
| 938 seed=seed, | |
| 939 disabled=disabled, | |
| 940 fails=fails, | |
| 941 flaky=flaky, | |
| 942 pre=False, | |
| 943 manual=manual) | |
| 944 except Failure, e: | |
| 945 print('Failed to list test cases. This means the test executable is so ' | |
| 946 'broken that it failed to start and enumerate its test cases.\n\n' | |
| 947 'An example of a potential problem causing this is a Windows API ' | |
| 948 'function not available on this version of Windows.') | |
| 949 print(e.args[0]) | |
| 950 return None | |
| 951 | |
| 952 if shards: | |
| 953 # This is necessary for Swarm log parsing. | |
| 954 print('Note: This is test shard %d of %d.' % (index+1, shards)) | |
| 955 | |
| 956 # Filters the test cases with the two lists. | |
| 957 if blacklist: | |
| 958 tests = [ | |
| 959 t for t in tests if not any(fnmatch.fnmatch(t, s) for s in blacklist) | |
| 960 ] | |
| 961 if whitelist: | |
| 962 tests = [ | |
| 963 t for t in tests if any(fnmatch.fnmatch(t, s) for s in whitelist) | |
| 964 ] | |
| 965 logging.info('Found %d test cases in %s' % (len(tests), ' '.join(cmd))) | |
| 966 return tests | |
| 967 | |
| 968 | |
| 969 def dump_results_as_json(result_file, results): | |
| 970 """Write the results out to a json file.""" | |
| 971 base_path = os.path.dirname(result_file) | |
| 972 if base_path and not os.path.isdir(base_path): | |
| 973 os.makedirs(base_path) | |
| 974 with open(result_file, 'wb') as f: | |
| 975 json.dump(results, f, sort_keys=True, indent=2) | |
| 976 | |
| 977 | |
| 978 def dump_results_as_xml(gtest_output, results, now): | |
| 979 """Write the results out to a xml file in google-test compatible format.""" | |
| 980 # TODO(maruel): Print all the test cases, including the ones that weren't run | |
| 981 # and the retries. | |
| 982 test_suites = {} | |
| 983 for test_case, result in results['test_cases'].iteritems(): | |
| 984 suite, case = test_case.split('.', 1) | |
| 985 test_suites.setdefault(suite, {})[case] = result[0] | |
| 986 | |
| 987 with open(gtest_output, 'wb') as f: | |
| 988 # Sanity warning: hand-rolling XML. What could possibly go wrong? | |
| 989 f.write('<?xml version="1.0" ?>\n') | |
| 990 # TODO(maruel): File the fields nobody reads anyway. | |
| 991 # disabled="%d" errors="%d" failures="%d" | |
| 992 f.write( | |
| 993 ('<testsuites name="AllTests" tests="%d" time="%f" timestamp="%s">\n') | |
| 994 % (results['expected'], results['duration'], now)) | |
| 995 for suite_name, suite in test_suites.iteritems(): | |
| 996 # TODO(maruel): disabled="0" errors="0" failures="0" time="0" | |
| 997 f.write('<testsuite name="%s" tests="%d">\n' % (suite_name, len(suite))) | |
| 998 for case_name, case in suite.iteritems(): | |
| 999 if case['returncode'] == 0: | |
| 1000 f.write( | |
| 1001 ' <testcase classname="%s" name="%s" status="run" time="%f"/>\n' % | |
| 1002 (suite_name, case_name, case['duration'])) | |
| 1003 else: | |
| 1004 f.write( | |
| 1005 ' <testcase classname="%s" name="%s" status="run" time="%f">\n' % | |
| 1006 (suite_name, case_name, (case['duration'] or 0))) | |
| 1007 # While at it, hand-roll CDATA escaping too. | |
| 1008 output = ']]><![CDATA['.join((case['output'] or '').split(']]>')) | |
| 1009 # TODO(maruel): message="" type="" | |
| 1010 f.write('<failure><![CDATA[%s]]></failure></testcase>\n' % output) | |
| 1011 f.write('</testsuite>\n') | |
| 1012 f.write('</testsuites>') | |
| 1013 | |
| 1014 | |
| 1015 def append_gtest_output_to_xml(final_xml, filepath): | |
| 1016 """Combines the shard xml file with the final xml file.""" | |
| 1017 try: | |
| 1018 with open(filepath) as shard_xml_file: | |
| 1019 shard_xml = minidom.parse(shard_xml_file) | |
| 1020 except xml.parsers.expat.ExpatError as e: | |
| 1021 logging.error('Failed to parse %s: %s', filepath, e) | |
| 1022 return final_xml | |
| 1023 except IOError as e: | |
| 1024 logging.error('Failed to load %s: %s', filepath, e) | |
| 1025 # If the shard crashed, gtest will not have generated an xml file. | |
| 1026 return final_xml | |
| 1027 | |
| 1028 if not final_xml: | |
| 1029 # Out final xml is empty, let's prepopulate it with the first one we see. | |
| 1030 return shard_xml | |
| 1031 | |
| 1032 final_testsuites_by_name = dict( | |
| 1033 (suite.getAttribute('name'), suite) | |
| 1034 for suite in final_xml.documentElement.getElementsByTagName('testsuite')) | |
| 1035 | |
| 1036 for testcase in shard_xml.documentElement.getElementsByTagName('testcase'): | |
| 1037 # Don't bother updating the final xml if there is no data. | |
| 1038 status = testcase.getAttribute('status') | |
| 1039 if status == 'notrun': | |
| 1040 continue | |
| 1041 | |
| 1042 name = testcase.getAttribute('name') | |
| 1043 # Look in our final xml to see if it's there. | |
| 1044 to_remove = [] | |
| 1045 final_testsuite = final_testsuites_by_name[ | |
| 1046 testcase.getAttribute('classname')] | |
| 1047 for final_testcase in final_testsuite.getElementsByTagName('testcase'): | |
| 1048 # Trim all the notrun testcase instances to add the new instance there. | |
| 1049 # This is to make sure it works properly in case of a testcase being run | |
| 1050 # multiple times. | |
| 1051 if (final_testcase.getAttribute('name') == name and | |
| 1052 final_testcase.getAttribute('status') == 'notrun'): | |
| 1053 to_remove.append(final_testcase) | |
| 1054 | |
| 1055 for item in to_remove: | |
| 1056 final_testsuite.removeChild(item) | |
| 1057 # Reparent the XML node. | |
| 1058 final_testsuite.appendChild(testcase) | |
| 1059 | |
| 1060 return final_xml | |
| 1061 | |
| 1062 | |
| 1063 def running_serial_warning(): | |
| 1064 return ['*****************************************************', | |
| 1065 '*****************************************************', | |
| 1066 '*****************************************************', | |
| 1067 'WARNING: The remaining tests are going to be retried', | |
| 1068 'serially. All tests should be isolated and be able to pass', | |
| 1069 'regardless of what else is running.', | |
| 1070 'If you see a test that can only pass serially, that test is', | |
| 1071 'probably broken and should be fixed.', | |
| 1072 '*****************************************************', | |
| 1073 '*****************************************************', | |
| 1074 '*****************************************************'] | |
| 1075 | |
| 1076 | |
| 1077 def gen_gtest_output_dir(cwd, gtest_output): | |
| 1078 """Converts gtest_output to an actual path that can be used in parallel. | |
| 1079 | |
| 1080 Returns a 'corrected' gtest_output value. | |
| 1081 """ | |
| 1082 if not gtest_output.startswith('xml'): | |
| 1083 raise Failure('Can\'t parse --gtest_output=%s' % gtest_output) | |
| 1084 # Figure out the result filepath in case we can't parse it, it'd be | |
| 1085 # annoying to error out *after* running the tests. | |
| 1086 if gtest_output == 'xml': | |
| 1087 gtest_output = os.path.join(cwd, 'test_detail.xml') | |
| 1088 else: | |
| 1089 match = re.match(r'xml\:(.+)', gtest_output) | |
| 1090 if not match: | |
| 1091 raise Failure('Can\'t parse --gtest_output=%s' % gtest_output) | |
| 1092 # If match.group(1) is an absolute path, os.path.join() will do the right | |
| 1093 # thing. | |
| 1094 if match.group(1).endswith((os.path.sep, '/')): | |
| 1095 gtest_output = os.path.join(cwd, match.group(1), 'test_detail.xml') | |
| 1096 else: | |
| 1097 gtest_output = os.path.join(cwd, match.group(1)) | |
| 1098 | |
| 1099 base_path = os.path.dirname(gtest_output) | |
| 1100 if base_path and not os.path.isdir(base_path): | |
| 1101 os.makedirs(base_path) | |
| 1102 | |
| 1103 # Emulate google-test' automatic increasing index number. | |
| 1104 while True: | |
| 1105 try: | |
| 1106 # Creates a file exclusively. | |
| 1107 os.close(os.open(gtest_output, os.O_CREAT|os.O_EXCL|os.O_RDWR, 0666)) | |
| 1108 # It worked, we are done. | |
| 1109 return gtest_output | |
| 1110 except OSError: | |
| 1111 pass | |
| 1112 logging.debug('%s existed', gtest_output) | |
| 1113 base, ext = os.path.splitext(gtest_output) | |
| 1114 match = re.match(r'^(.+?_)(\d+)$', base) | |
| 1115 if match: | |
| 1116 base = match.group(1) + str(int(match.group(2)) + 1) | |
| 1117 else: | |
| 1118 base = base + '_0' | |
| 1119 gtest_output = base + ext | |
| 1120 | |
| 1121 | |
| 1122 def calc_cluster_default(num_test_cases, jobs): | |
| 1123 """Calculates a desired number for clusters depending on the number of test | |
| 1124 cases and parallel jobs. | |
| 1125 """ | |
| 1126 if not num_test_cases: | |
| 1127 return 0 | |
| 1128 chunks = 6 * jobs | |
| 1129 if chunks >= num_test_cases: | |
| 1130 # Too many chunks, use 1~5 test case per thread. Not enough to start | |
| 1131 # chunking. | |
| 1132 value = num_test_cases / jobs | |
| 1133 else: | |
| 1134 # Use chunks that are spread across threads. | |
| 1135 value = (num_test_cases + chunks - 1) / chunks | |
| 1136 # Limit to 10 test cases per cluster. | |
| 1137 return min(10, max(1, value)) | |
| 1138 | |
| 1139 | |
| 1140 def run_test_cases( | |
| 1141 cmd, cwd, test_cases, jobs, timeout, clusters, retries, run_all, | |
| 1142 max_failures, no_cr, gtest_output, result_file, verbose): | |
| 1143 """Runs test cases in parallel. | |
| 1144 | |
| 1145 Arguments: | |
| 1146 - cmd: command to run. | |
| 1147 - cwd: working directory. | |
| 1148 - test_cases: list of preprocessed test cases to run. | |
| 1149 - jobs: number of parallel execution threads to do. | |
| 1150 - timeout: individual test case timeout. Modulated when used with | |
| 1151 clustering. | |
| 1152 - clusters: number of test cases to lump together in a single execution. 0 | |
| 1153 means the default automatic value which depends on len(test_cases) and | |
| 1154 jobs. Capped to len(test_cases) / jobs. | |
| 1155 - retries: number of times a test case can be retried. | |
| 1156 - run_all: If true, do not early return even if all test cases fail. | |
| 1157 - max_failures is the absolute maximum number of tolerated failures or None. | |
| 1158 - no_cr: makes output friendly to piped logs. | |
| 1159 - gtest_output: saves results as xml. | |
| 1160 - result_file: saves results as json. | |
| 1161 - verbose: print more details. | |
| 1162 | |
| 1163 It may run a subset of the test cases if too many test cases failed, as | |
| 1164 determined with max_failures, retries and run_all. | |
| 1165 """ | |
| 1166 assert 0 <= retries <= 100000 | |
| 1167 if not test_cases: | |
| 1168 return 0 | |
| 1169 if run_all: | |
| 1170 decider = RunAll() | |
| 1171 else: | |
| 1172 # If 10% of test cases fail, just too bad. | |
| 1173 decider = RunSome(len(test_cases), retries, 2, 0.1, max_failures) | |
| 1174 | |
| 1175 if not clusters: | |
| 1176 clusters = calc_cluster_default(len(test_cases), jobs) | |
| 1177 else: | |
| 1178 # Limit the value. | |
| 1179 clusters = max(min(clusters, len(test_cases) / jobs), 1) | |
| 1180 | |
| 1181 logging.debug('%d test cases with clusters of %d', len(test_cases), clusters) | |
| 1182 | |
| 1183 if gtest_output: | |
| 1184 gtest_output = gen_gtest_output_dir(cwd, gtest_output) | |
| 1185 columns = [('index', 0), ('size', len(test_cases))] | |
| 1186 progress = threading_utils.Progress(columns) | |
| 1187 progress.use_cr_only = not no_cr | |
| 1188 serial_tasks = threading_utils.QueueWithProgress(progress) | |
| 1189 | |
| 1190 def add_serial_task(priority, func, *args, **kwargs): | |
| 1191 """Adds a serial task, to be executed later.""" | |
| 1192 assert isinstance(priority, int) | |
| 1193 assert callable(func) | |
| 1194 serial_tasks.put((priority, func, args, kwargs)) | |
| 1195 | |
| 1196 with threading_utils.ThreadPoolWithProgress( | |
| 1197 progress, jobs, jobs, len(test_cases)) as pool: | |
| 1198 runner = ChromiumGoogleTestRunner( | |
| 1199 cmd, | |
| 1200 cwd, | |
| 1201 timeout, | |
| 1202 progress, | |
| 1203 retries, | |
| 1204 decider, | |
| 1205 verbose, | |
| 1206 pool.add_task, | |
| 1207 add_serial_task) | |
| 1208 function = runner.map | |
| 1209 # Cluster the test cases right away. | |
| 1210 for i in xrange((len(test_cases) + clusters - 1) / clusters): | |
| 1211 cluster = test_cases[i*clusters : (i+1)*clusters] | |
| 1212 pool.add_task(i, function, i, cluster, 0) | |
| 1213 results = pool.join() | |
| 1214 | |
| 1215 # Retry any failed tests serially. | |
| 1216 if not serial_tasks.empty(): | |
| 1217 progress.update_item('\n'.join(running_serial_warning()), raw=True) | |
| 1218 progress.print_update() | |
| 1219 | |
| 1220 while not serial_tasks.empty(): | |
| 1221 _priority, func, args, kwargs = serial_tasks.get() | |
| 1222 for out in func(*args, **kwargs): | |
| 1223 results.append(out) | |
| 1224 serial_tasks.task_done() | |
| 1225 progress.print_update() | |
| 1226 | |
| 1227 # Call join since that is a standard call once a queue has been emptied. | |
| 1228 serial_tasks.join() | |
| 1229 | |
| 1230 duration = time.time() - pool.tasks.progress.start | |
| 1231 | |
| 1232 cleaned = {} | |
| 1233 for i in results: | |
| 1234 cleaned.setdefault(i['test_case'], []).append(i) | |
| 1235 results = cleaned | |
| 1236 | |
| 1237 # Total time taken to run each test case. | |
| 1238 test_case_duration = dict( | |
| 1239 (test_case, sum((i.get('duration') or 0) for i in item)) | |
| 1240 for test_case, item in results.iteritems()) | |
| 1241 | |
| 1242 # Classify the results | |
| 1243 success = [] | |
| 1244 flaky = [] | |
| 1245 fail = [] | |
| 1246 nb_runs = 0 | |
| 1247 for test_case in sorted(results): | |
| 1248 items = results[test_case] | |
| 1249 nb_runs += len(items) | |
| 1250 if not any(i['returncode'] == 0 for i in items): | |
| 1251 fail.append(test_case) | |
| 1252 elif len(items) > 1 and any(i['returncode'] == 0 for i in items): | |
| 1253 flaky.append(test_case) | |
| 1254 elif len(items) == 1 and items[0]['returncode'] == 0: | |
| 1255 success.append(test_case) | |
| 1256 else: | |
| 1257 # The test never ran. | |
| 1258 assert False, items | |
| 1259 missing = sorted(set(test_cases) - set(success) - set(flaky) - set(fail)) | |
| 1260 | |
| 1261 saved = { | |
| 1262 'test_cases': results, | |
| 1263 'expected': len(test_cases), | |
| 1264 'success': success, | |
| 1265 'flaky': flaky, | |
| 1266 'fail': fail, | |
| 1267 'missing': missing, | |
| 1268 'duration': duration, | |
| 1269 } | |
| 1270 if result_file: | |
| 1271 dump_results_as_json(result_file, saved) | |
| 1272 if gtest_output: | |
| 1273 dump_results_as_xml(gtest_output, saved, datetime.datetime.now()) | |
| 1274 sys.stdout.write('\n') | |
| 1275 if not results: | |
| 1276 return 1 | |
| 1277 | |
| 1278 if flaky: | |
| 1279 print('Flaky tests:') | |
| 1280 for test_case in sorted(flaky): | |
| 1281 items = results[test_case] | |
| 1282 print(' %s (tried %d times)' % (test_case, len(items))) | |
| 1283 | |
| 1284 if fail: | |
| 1285 print('Failed tests:') | |
| 1286 for test_case in sorted(fail): | |
| 1287 print(' %s' % test_case) | |
| 1288 | |
| 1289 if not decider.should_stop() and missing: | |
| 1290 print('Missing tests:') | |
| 1291 for test_case in sorted(missing): | |
| 1292 print(' %s' % test_case) | |
| 1293 | |
| 1294 print('Summary:') | |
| 1295 if decider.should_stop(): | |
| 1296 print(' ** STOPPED EARLY due to high failure rate **') | |
| 1297 output = [ | |
| 1298 ('Success', success), | |
| 1299 ('Flaky', flaky), | |
| 1300 ('Fail', fail), | |
| 1301 ] | |
| 1302 if missing: | |
| 1303 output.append(('Missing', missing)) | |
| 1304 total_expected = len(test_cases) | |
| 1305 for name, items in output: | |
| 1306 number = len(items) | |
| 1307 print( | |
| 1308 ' %7s: %4d %6.2f%% %7.2fs' % ( | |
| 1309 name, | |
| 1310 number, | |
| 1311 number * 100. / total_expected, | |
| 1312 sum(test_case_duration.get(item, 0) for item in items))) | |
| 1313 print(' %.2fs Done running %d tests with %d executions. %.2f test/s' % ( | |
| 1314 duration, | |
| 1315 len(results), | |
| 1316 nb_runs, | |
| 1317 nb_runs / duration if duration else 0)) | |
| 1318 return int(bool(fail) or decider.stopped or bool(missing)) | |
| 1319 | |
| 1320 | |
| 1321 class OptionParserWithLogging(tools.OptionParserWithLogging): | |
| 1322 def __init__(self, **kwargs): | |
| 1323 tools.OptionParserWithLogging.__init__( | |
| 1324 self, | |
| 1325 log_file=os.environ.get('RUN_TEST_CASES_LOG_FILE', ''), | |
| 1326 **kwargs) | |
| 1327 | |
| 1328 | |
| 1329 class OptionParserWithTestSharding(OptionParserWithLogging): | |
| 1330 """Adds automatic handling of test sharding""" | |
| 1331 def __init__(self, **kwargs): | |
| 1332 OptionParserWithLogging.__init__(self, **kwargs) | |
| 1333 | |
| 1334 def as_digit(variable, default): | |
| 1335 return int(variable) if variable.isdigit() else default | |
| 1336 | |
| 1337 group = optparse.OptionGroup(self, 'Which shard to select') | |
| 1338 group.add_option( | |
| 1339 '-I', '--index', | |
| 1340 type='int', | |
| 1341 default=as_digit(os.environ.get('GTEST_SHARD_INDEX', ''), None), | |
| 1342 help='Shard index to select') | |
| 1343 group.add_option( | |
| 1344 '-S', '--shards', | |
| 1345 type='int', | |
| 1346 default=as_digit(os.environ.get('GTEST_TOTAL_SHARDS', ''), None), | |
| 1347 help='Total number of shards to calculate from the --index to select') | |
| 1348 self.add_option_group(group) | |
| 1349 | |
| 1350 def parse_args(self, *args, **kwargs): | |
| 1351 options, args = OptionParserWithLogging.parse_args(self, *args, **kwargs) | |
| 1352 if bool(options.shards) != bool(options.index is not None): | |
| 1353 self.error('Use both --index X --shards Y or none of them') | |
| 1354 return options, args | |
| 1355 | |
| 1356 | |
| 1357 class OptionParserWithTestShardingAndFiltering(OptionParserWithTestSharding): | |
| 1358 """Adds automatic handling of test sharding and filtering.""" | |
| 1359 def __init__(self, *args, **kwargs): | |
| 1360 OptionParserWithTestSharding.__init__(self, *args, **kwargs) | |
| 1361 | |
| 1362 group = optparse.OptionGroup(self, 'Which test cases to select') | |
| 1363 group.add_option( | |
| 1364 '-w', '--whitelist', | |
| 1365 default=[], | |
| 1366 action='append', | |
| 1367 help='filter to apply to test cases to run, wildcard-style, defaults ' | |
| 1368 'to all test') | |
| 1369 group.add_option( | |
| 1370 '-b', '--blacklist', | |
| 1371 default=[], | |
| 1372 action='append', | |
| 1373 help='filter to apply to test cases to skip, wildcard-style, defaults ' | |
| 1374 'to no test') | |
| 1375 group.add_option( | |
| 1376 '-T', '--test-case-file', | |
| 1377 help='File containing the exact list of test cases to run') | |
| 1378 group.add_option( | |
| 1379 '--gtest_filter', | |
| 1380 default=os.environ.get('GTEST_FILTER', ''), | |
| 1381 help='Select test cases like google-test does, separated with ":"') | |
| 1382 group.add_option( | |
| 1383 '--seed', | |
| 1384 type='int', | |
| 1385 default=os.environ.get('GTEST_RANDOM_SEED', '1'), | |
| 1386 help='Deterministically shuffle the test list if non-0. default: ' | |
| 1387 '%default') | |
| 1388 group.add_option( | |
| 1389 '-d', '--disabled', | |
| 1390 action='store_true', | |
| 1391 default=int(os.environ.get('GTEST_ALSO_RUN_DISABLED_TESTS', '0')), | |
| 1392 help='Include DISABLED_ tests') | |
| 1393 group.add_option( | |
| 1394 '--gtest_also_run_disabled_tests', | |
| 1395 action='store_true', | |
| 1396 dest='disabled', | |
| 1397 help='same as --disabled') | |
| 1398 self.add_option_group(group) | |
| 1399 | |
| 1400 group = optparse.OptionGroup( | |
| 1401 self, 'Which test cases to select; chromium-specific') | |
| 1402 group.add_option( | |
| 1403 '-f', '--fails', | |
| 1404 action='store_true', | |
| 1405 help='Include FAILS_ tests') | |
| 1406 group.add_option( | |
| 1407 '-F', '--flaky', | |
| 1408 action='store_true', | |
| 1409 help='Include FLAKY_ tests') | |
| 1410 group.add_option( | |
| 1411 '-m', '--manual', | |
| 1412 action='store_true', | |
| 1413 help='Include MANUAL_ tests') | |
| 1414 group.add_option( | |
| 1415 '--run-manual', | |
| 1416 action='store_true', | |
| 1417 dest='manual', | |
| 1418 help='same as --manual') | |
| 1419 self.add_option_group(group) | |
| 1420 | |
| 1421 def parse_args(self, *args, **kwargs): | |
| 1422 options, args = OptionParserWithTestSharding.parse_args( | |
| 1423 self, *args, **kwargs) | |
| 1424 | |
| 1425 if options.gtest_filter: | |
| 1426 # Override any other option. | |
| 1427 # Based on UnitTestOptions::FilterMatchesTest() in | |
| 1428 # http://code.google.com/p/googletest/source/browse/#svn%2Ftrunk%2Fsrc | |
| 1429 if '-' in options.gtest_filter: | |
| 1430 options.whitelist, options.blacklist = options.gtest_filter.split('-', | |
| 1431 1) | |
| 1432 else: | |
| 1433 options.whitelist = options.gtest_filter | |
| 1434 options.blacklist = '' | |
| 1435 options.whitelist = [i for i in options.whitelist.split(':') if i] | |
| 1436 options.blacklist = [i for i in options.blacklist.split(':') if i] | |
| 1437 | |
| 1438 return options, args | |
| 1439 | |
| 1440 @staticmethod | |
| 1441 def process_gtest_options(cmd, cwd, options): | |
| 1442 """Grabs the test cases.""" | |
| 1443 if options.test_case_file: | |
| 1444 with open(options.test_case_file, 'r') as f: | |
| 1445 # Do not shuffle or alter the file in any way in that case except to | |
| 1446 # strip whitespaces. | |
| 1447 return [l for l in (l.strip() for l in f) if l] | |
| 1448 else: | |
| 1449 return get_test_cases( | |
| 1450 cmd, | |
| 1451 cwd, | |
| 1452 options.whitelist, | |
| 1453 options.blacklist, | |
| 1454 options.index, | |
| 1455 options.shards, | |
| 1456 options.seed, | |
| 1457 options.disabled, | |
| 1458 options.fails, | |
| 1459 options.flaky, | |
| 1460 options.manual) | |
| 1461 | |
| 1462 | |
| 1463 class OptionParserTestCases(OptionParserWithTestShardingAndFiltering): | |
| 1464 def __init__(self, *args, **kwargs): | |
| 1465 OptionParserWithTestShardingAndFiltering.__init__(self, *args, **kwargs) | |
| 1466 self.add_option( | |
| 1467 '-j', '--jobs', | |
| 1468 type='int', | |
| 1469 default=threading_utils.num_processors(), | |
| 1470 help='Number of parallel jobs; default=%default') | |
| 1471 self.add_option( | |
| 1472 '--use-less-jobs', | |
| 1473 action='store_const', | |
| 1474 const=max(1, threading_utils.num_processors() / 2), | |
| 1475 dest='jobs', | |
| 1476 help='Starts less parallel jobs than the default, used to help reduce' | |
| 1477 'contention between threads if all the tests are very CPU heavy.') | |
| 1478 self.add_option( | |
| 1479 '-t', '--timeout', | |
| 1480 type='int', | |
| 1481 default=75, | |
| 1482 help='Timeout for a single test case, in seconds default:%default') | |
| 1483 self.add_option( | |
| 1484 '--clusters', | |
| 1485 type='int', | |
| 1486 help='Number of test cases to cluster together, clamped to ' | |
| 1487 'len(test_cases) / jobs; the default is automatic') | |
| 1488 | |
| 1489 | |
| 1490 def process_args(argv): | |
| 1491 parser = OptionParserTestCases( | |
| 1492 usage='%prog <options> [gtest]', | |
| 1493 verbose=int(os.environ.get('ISOLATE_DEBUG', 0))) | |
| 1494 parser.add_option( | |
| 1495 '--run-all', | |
| 1496 action='store_true', | |
| 1497 help='Do not fail early when a large number of test cases fail') | |
| 1498 parser.add_option( | |
| 1499 '--max-failures', type='int', | |
| 1500 help='Limit the number of failures before aborting') | |
| 1501 parser.add_option( | |
| 1502 '--retries', type='int', default=2, | |
| 1503 help='Number of times each test case should be retried in case of ' | |
| 1504 'failure.') | |
| 1505 parser.add_option( | |
| 1506 '--no-dump', | |
| 1507 action='store_true', | |
| 1508 help='do not generate a .run_test_cases file') | |
| 1509 parser.add_option( | |
| 1510 '--no-cr', | |
| 1511 action='store_true', | |
| 1512 help='Use LF instead of CR for status progress') | |
| 1513 parser.add_option( | |
| 1514 '--result', | |
| 1515 help='Override the default name of the generated .run_test_cases file') | |
| 1516 | |
| 1517 group = optparse.OptionGroup(parser, 'google-test compability flags') | |
| 1518 group.add_option( | |
| 1519 '--gtest_list_tests', | |
| 1520 action='store_true', | |
| 1521 help='List all the test cases unformatted. Keeps compatibility with the ' | |
| 1522 'executable itself.') | |
| 1523 group.add_option( | |
| 1524 '--gtest_output', | |
| 1525 default=os.environ.get('GTEST_OUTPUT', ''), | |
| 1526 help='XML output to generate') | |
| 1527 parser.add_option_group(group) | |
| 1528 | |
| 1529 options, args = parser.parse_args(argv) | |
| 1530 | |
| 1531 if not args: | |
| 1532 parser.error( | |
| 1533 'Please provide the executable line to run, if you need fancy things ' | |
| 1534 'like xvfb, start this script from *inside* xvfb, it\'ll be much faster' | |
| 1535 '.') | |
| 1536 | |
| 1537 if options.run_all and options.max_failures is not None: | |
| 1538 parser.error('Use only one of --run-all or --max-failures') | |
| 1539 return parser, options, tools.fix_python_path(args) | |
| 1540 | |
| 1541 | |
| 1542 def main(argv): | |
| 1543 """CLI frontend to validate arguments.""" | |
| 1544 tools.disable_buffering() | |
| 1545 parser, options, cmd = process_args(argv) | |
| 1546 | |
| 1547 if options.gtest_list_tests: | |
| 1548 # Special case, return the output of the target unmodified. | |
| 1549 return subprocess.call(cmd + ['--gtest_list_tests']) | |
| 1550 | |
| 1551 cwd = os.getcwd() | |
| 1552 test_cases = parser.process_gtest_options(cmd, cwd, options) | |
| 1553 | |
| 1554 if options.no_dump: | |
| 1555 result_file = None | |
| 1556 else: | |
| 1557 result_file = options.result | |
| 1558 if not result_file: | |
| 1559 if cmd[0] == sys.executable: | |
| 1560 result_file = '%s.run_test_cases' % cmd[1] | |
| 1561 else: | |
| 1562 result_file = '%s.run_test_cases' % cmd[0] | |
| 1563 | |
| 1564 if not test_cases: | |
| 1565 # The fact of not running any test is considered a failure. This is to | |
| 1566 # prevent silent failure with an invalid --gtest_filter argument or because | |
| 1567 # of a misconfigured unit test. | |
| 1568 if test_cases is not None: | |
| 1569 print('Found no test to run') | |
| 1570 if result_file: | |
| 1571 dump_results_as_json(result_file, { | |
| 1572 'test_cases': [], | |
| 1573 'expected': 0, | |
| 1574 'success': [], | |
| 1575 'flaky': [], | |
| 1576 'fail': [], | |
| 1577 'missing': [], | |
| 1578 'duration': 0, | |
| 1579 }) | |
| 1580 return 1 | |
| 1581 | |
| 1582 if options.disabled: | |
| 1583 cmd.append('--gtest_also_run_disabled_tests') | |
| 1584 if options.manual: | |
| 1585 cmd.append('--run-manual') | |
| 1586 | |
| 1587 try: | |
| 1588 return run_test_cases( | |
| 1589 cmd, | |
| 1590 cwd, | |
| 1591 test_cases, | |
| 1592 options.jobs, | |
| 1593 options.timeout, | |
| 1594 options.clusters, | |
| 1595 options.retries, | |
| 1596 options.run_all, | |
| 1597 options.max_failures, | |
| 1598 options.no_cr, | |
| 1599 options.gtest_output, | |
| 1600 result_file, | |
| 1601 options.verbose) | |
| 1602 except Failure as e: | |
| 1603 print >> sys.stderr, e.args[0] | |
| 1604 return 1 | |
| 1605 | |
| 1606 | |
| 1607 if __name__ == '__main__': | |
| 1608 sys.exit(main(sys.argv[1:])) | |
| OLD | NEW |