tools/flakiness/find_flakiness.py - Issue 7688004: Added tools for finding and purging flaky tests

Side by Side Diff: tools/flakiness/find_flakiness.py

Issue 7688004: Added tools for finding and purging flaky tests (Closed) Base URL: http://git.chromium.org/git/chromium.git@trunk

Patch Set: Condensed into one file Created 9 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 #!/usr/bin/env python

	2 # Copyright (c) 2011 The Chromium Authors. All rights reserved.

	3 # Use of this source code is governed by a BSD-style license that can be

	4 # found in the LICENSE file.

	5

	6 """Contains two functions that run different test cases and the same test

	7 case in parallel repeatedly to identify flaky tests.

	8 """

	9

	10

	11 import os

	12 import re

	13 import subprocess

	14 import time

	15

	16

	17 # Defaults for FindShardingFlakiness().

	18 FF_DATA_SUFFIX = '_flakies'

	19 FF_SLEEP_INTERVAL = 10.0

	20 FF_NUM_ITERATIONS = 100

	21 FF_SUPERVISOR_ARGS = ['-r3', '--random-seed']

	22

	23 # Defaults for FindUnaryFlakiness().

	24 FF_OUTPUT_SUFFIX = '_purges'

	25 FF_NUM_PROCS = 20

	26 FF_NUM_REPEATS = 10

	27 FF_TIMEOUT = 600

	28

	29

	30 def FindShardingFlakiness(test_path, data_path, supervisor_args):

	31 """Finds flaky test cases by sharding and running a test for the specified

	32 number of times. The data file is read at the beginning of each run to find

	33 the last known counts and is overwritten at the end of each run with the new

	34 counts. There is an optional sleep interval between each run so the script can

	35 be killed without losing the data, useful for overnight (or weekend!) runs.

	36 """

	37

	38 failed_tests = {}

	39 # Read a previously written data file.

	40 if os.path.exists(data_path):

	41 data_file = open(data_path, 'r')

	42 num_runs = int(data_file.readline().split(' ')[0])

	43 num_passes = int(data_file.readline().split(' ')[0])

	44 for line in data_file:

	45 if line:

	46 split_line = line.split(' -> ')

	47 failed_tests[split_line[0]] = int(split_line[1])

	48 data_file.close()

	49 # No data file found.

	50 else:

	51 num_runs = 0

	52 num_passes = 0

	53

	54 log_lines = False

	55 args = ['python', '../sharding_supervisor/sharding_supervisor.py']

	56 args.extend(supervisor_args + [test_path])

	57 proc = subprocess.Popen(args, stderr=subprocess.PIPE)

	58

	59 # Shard the test and collect failures.

	60 while True:

	61 line = proc.stderr.readline()

	62 if not line:

	63 if proc.poll() is not None:

	64 break

	65 continue

	66 print line.rstrip()

	67 if log_lines:

	68 line = line.rstrip()

	69 if line in failed_tests:

	70 failed_tests[line] += 1

	71 else:

	72 failed_tests[line] = 1

	73 elif line.find('FAILED TESTS:') >= 0:

	74 log_lines = True

	75 num_runs += 1

	76 if proc.returncode == 0:

	77 num_passes += 1

	78

	79 # Write the data file and print results.

	80 data_file = open(data_path, 'w')

	81 print '%i runs' % num_runs

	82 data_file.write('%i runs\n' % num_runs)

	83 print '%i passes' % num_passes

	84 data_file.write('%i passes\n' % num_passes)

	85 for (test, count) in failed_tests.iteritems():

	86 print '%s -> %i' % (test, count)

	87 data_file.write('%s -> %i\n' % (test, count))

	88 data_file.close()

	89

	90

	91 def FindUnaryFlakiness(test_path, output_path, num_procs, num_repeats, timeout):

	92 """Runs all the test cases in a given test in parallel with itself, to get at

	93 those that hold on to shared resources. The idea is that if a test uses a

	94 unary resource, then running many instances of this test will purge out some

	95 of them as failures or timeouts.

	96 """

	97

	98 test_name_regex = r'((\w+/)?\w+\.\w+(/\d+)?)'

	99 test_start = re.compile('\[\s+RUN\s+\] ' + test_name_regex)

	100 test_list = []

	101

	102 # Run the test to discover all the test cases.

	103 proc = subprocess.Popen([test_path], stdout=subprocess.PIPE)

	104 while True:

	105 line = proc.stdout.readline()

	106 if not line:

	107 if proc.poll() is not None:

	108 break

	109 continue

	110 print line.rstrip()

	111 results = test_start.search(line)

	112 if results:

	113 test_list.append(results.group(1))

	114

	115 failures = []

	116 index = 0

	117 total = len(test_list)

	118

	119 # Run each test case in parallel with itself.

	120 for test_name in test_list:

	121 num_fails = 0

	122 num_terminated = 0

	123 procs = []

	124 args = [test_path, '--gtest_filter=' + test_name,

	125 '--gtest_repeat=%i' % num_repeats]

	126 while len(procs) < num_procs:

	127 procs.append(subprocess.Popen(args))

	128 seconds = 0

	129 while procs:

	130 for proc in procs:

	131 if proc.poll() is not None:

	132 if proc.returncode != 0:

	133 ++num_fails

	134 procs.remove(proc)

	135 # Timeout exceeded, kill the remaining processes and make a note.

	136 if seconds > timeout:

	137 num_fails += len(procs)

	138 num_terminated = len(procs)

	139 while procs:

	140 procs.pop().terminate()

	141 time.sleep(1.0)

	142 seconds += 1

	143 if num_fails:

	144 line = '%s: %i failed' % (test_name, num_fails)

	145 if num_terminated:

	146 line += ' (%i terminated)' % num_terminated

	147 failures.append(line)

	148 print '%s (%i / %i): %i failed' % (test_name, index, total, num_fails)

	149 index += 1

	150 time.sleep(1.0)

	151

	152 # Print the results and write the data file.

	153 print failures

	154 data_file = open(output_path, 'w')

	155 for line in failures:

	156 data_file.write(line + '\n')

	157 data_file.close()

	158

	159

	160 def main():

	161 if not args:

	162 parser.error('You must specify a path to test!')

	163 if not os.path.exists(args[0]):

	164 parser.error('%s does not exist!' % args[0])

	165

	166 data_path = os.path.basename(args[0]) + FF_DATA_SUFFIX

	167 output_path = os.path.basename(args[0]) + FF_OUTPUT_SUFFIX

	168

	169 for i in range(FF_NUM_ITERATIONS):

	170 FindShardingFlakiness(args[0], data_path, FF_SUPERVISOR_ARGS)

	171 print 'That was just iteration %i of %i.' % (i + 1, FF_NUM_ITERATIONS)

	172 time.sleep(FF_SLEEP_INTERVAL)

	173

	174 FindUnaryFlakiness(

	175 args[0], output_path, FF_NUM_PROCS, FF_NUM_REPEATS, FF_TIMEOUT)

	176

	177

	178 if __name__ == '__main__':

	179 main()

OLD	NEW

« no previous file with comments | « no previous file | no next file » | no next file with comments »