Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(74)

Side by Side Diff: run_test_cases.py

Issue 12459014: Implement clustering support in run_test_cases.py. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/swarm_client
Patch Set: Fixes Created 7 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | tests/gtest_fake/expected.xml » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be 3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file. 4 # found in the LICENSE file.
5 5
6 """Runs each test cases as a single shard, single process execution. 6 """Runs each test cases as a single shard, single process execution.
7 7
8 Similar to sharding_supervisor.py but finer grained. It runs each test case 8 Similar to sharding_supervisor.py but finer grained. It runs each test case
9 individually instead of running per shard. Runs multiple instances in parallel. 9 individually instead of running per shard. Runs multiple instances in parallel.
10 """ 10 """
(...skipping 652 matching lines...) Expand 10 before | Expand all | Expand 10 after
663 # times in total. 663 # times in total.
664 self.retries = retries 664 self.retries = retries
665 self.decider = decider 665 self.decider = decider
666 self.verbose = verbose 666 self.verbose = verbose
667 self.add_task = add_task 667 self.add_task = add_task
668 self.add_serial_task = add_serial_task 668 self.add_serial_task = add_serial_task
669 # It is important to remove the shard environment variables since it could 669 # It is important to remove the shard environment variables since it could
670 # conflict with --gtest_filter. 670 # conflict with --gtest_filter.
671 self.env = setup_gtest_env() 671 self.env = setup_gtest_env()
672 672
673 def map(self, priority, test_case, try_count): 673 def map(self, priority, test_cases, try_count):
674 """Traces a single test case and returns its output. 674 """Traces a single test case and returns its output.
675 675
676 try_count is 0 based, the original try is 0. 676 try_count is 0 based, the original try is 0.
677 """ 677 """
678 if self.decider.should_stop(): 678 if self.decider.should_stop():
679 return [] 679 return []
680 680
681 cmd = self.cmd + ['--gtest_filter=%s' % ':'.join(test_cases)]
682 if '--gtest_print_time' not in cmd:
683 cmd.append('--gtest_print_time')
681 start = time.time() 684 start = time.time()
682 output, returncode = call_with_timeout( 685 output, returncode = call_with_timeout(
683 self.cmd + ['--gtest_filter=%s' % test_case], 686 cmd,
684 self.timeout, 687 self.timeout,
685 cwd=self.cwd_dir, 688 cwd=self.cwd_dir,
686 stderr=subprocess.STDOUT, 689 stderr=subprocess.STDOUT,
687 env=self.env) 690 env=self.env)
688 duration = time.time() - start 691 duration = time.time() - start
689 data = { 692
690 'test_case': test_case, 693 # It needs to be valid utf-8 otherwise it can't be stored.
691 'returncode': returncode, 694 # TODO(maruel): Be more intelligent than decoding to ascii.
692 'duration': duration, 695 utf8_output = output.decode('ascii', 'ignore').encode('utf-8')
693 # It needs to be valid utf-8 otherwise it can't be store. 696
694 'output': output.decode('ascii', 'ignore').encode('utf-8'), 697 if len(test_cases) > 1:
695 } 698 data = process_output(utf8_output, test_cases, duration, returncode)
696 if '[ RUN ]' not in output: 699 else:
697 # Can't find gtest marker, mark it as invalid. 700 if '[ RUN ]' not in output:
698 returncode = returncode or 1 701 # Can't find gtest marker, mark it as invalid.
699 self.decider.got_result(not bool(returncode)) 702 returncode = returncode or 1
703 data = [
704 {
705 'test_case': test_cases[0],
706 'returncode': returncode,
707 'duration': duration,
708 'output': utf8_output,
709 }
710 ]
711
700 if sys.platform == 'win32': 712 if sys.platform == 'win32':
701 output = output.replace('\r\n', '\n') 713 output = output.replace('\r\n', '\n')
702 need_to_retry = returncode and try_count < self.retries
703 714
704 if try_count: 715 for i in data:
705 line = '%s (%.2fs) - retry #%d' % (test_case, duration, try_count) 716 self.decider.got_result(i['returncode'] == 0)
M-A Ruel 2013/03/11 18:43:22 I got caught by the fact that i['returncode'] == N
706 else: 717 need_to_retry = i['returncode'] != 0 and try_count < self.retries
707 line = '%s (%.2fs)' % (test_case, duration) 718 if try_count:
708 if self.verbose or returncode or try_count > 0: 719 line = '%s (%.2fs) - retry #%d' % (
709 # Print output in one of three cases: 720 i['test_case'], i['duration'] or 0, try_count)
710 # --verbose was specified. 721 else:
711 # The test failed. 722 line = '%s (%.2fs)' % (i['test_case'], i['duration'] or 0)
712 # The wasn't the first attempt (this is needed so the test parser can 723 if self.verbose or i['returncode'] != 0 or try_count > 0:
713 # detect that a test has been successfully retried). 724 # Print output in one of three cases:
714 line += '\n' + output 725 # --verbose was specified.
715 self.progress.update_item(line, True, need_to_retry) 726 # The test failed.
727 # The wasn't the first attempt (this is needed so the test parser can
728 # detect that a test has been successfully retried).
729 line += '\n' + i['output']
730 self.progress.update_item(line, True, need_to_retry)
716 731
717 if need_to_retry: 732 if need_to_retry:
718 if try_count + 1 < self.retries: 733 if try_count + 1 < self.retries:
719 # The test failed and needs to be retried normally. 734 # The test failed and needs to be retried normally.
720 # Leave a buffer of ~40 test cases before retrying. 735 # Leave a buffer of ~40 test cases before retrying.
721 priority += 40 736 priority += 40
722 self.add_task(priority, self.map, priority, test_case, try_count + 1) 737 self.add_task(
723 else: 738 priority, self.map, priority, [i['test_case']], try_count + 1)
724 # This test only has one retry left, so the final retry should be 739 else:
725 # done serially. 740 # This test only has one retry left, so the final retry should be
726 self.add_serial_task(priority, self.map, priority, test_case, 741 # done serially.
727 try_count + 1) 742 self.add_serial_task(
728 743 priority, self.map, priority, [i['test_case']], try_count + 1)
729 return [data] 744 return data
730 745
731 746
732 def get_test_cases(cmd, cwd, whitelist, blacklist, index, shards, seed): 747 def get_test_cases(cmd, cwd, whitelist, blacklist, index, shards, seed):
733 """Returns the filtered list of test cases. 748 """Returns the filtered list of test cases.
734 749
735 This is done synchronously. 750 This is done synchronously.
736 """ 751 """
737 try: 752 try:
738 tests = list_test_cases( 753 tests = list_test_cases(
739 cmd, 754 cmd,
(...skipping 202 matching lines...) Expand 10 before | Expand all | Expand 10 after
942 def run_test_cases( 957 def run_test_cases(
943 cmd, cwd, test_cases, jobs, timeout, clusters, retries, run_all, 958 cmd, cwd, test_cases, jobs, timeout, clusters, retries, run_all,
944 max_failures, no_cr, gtest_output, result_file, verbose): 959 max_failures, no_cr, gtest_output, result_file, verbose):
945 """Runs test cases in parallel. 960 """Runs test cases in parallel.
946 961
947 Arguments: 962 Arguments:
948 - cmd: command to run. 963 - cmd: command to run.
949 - cwd: working directory. 964 - cwd: working directory.
950 - test_cases: list of preprocessed test cases to run. 965 - test_cases: list of preprocessed test cases to run.
951 - jobs: number of parallel execution threads to do. 966 - jobs: number of parallel execution threads to do.
952 - timeout: individual test case timeout. 967 - timeout: individual test case timeout. Modulated when used with
968 clustering.
953 - clusters: number of test cases to lump together in a single execution. 0 969 - clusters: number of test cases to lump together in a single execution. 0
954 means the default automatic value which depends on len(test_cases) and 970 means the default automatic value which depends on len(test_cases) and
955 jobs. Capped to len(test_cases) / jobs. 971 jobs. Capped to len(test_cases) / jobs.
956 - retries: number of times a test case can be retried. 972 - retries: number of times a test case can be retried.
957 - run_all: If true, do not early return even if all test cases fail. 973 - run_all: If true, do not early return even if all test cases fail.
958 - max_failures is the absolute maximum number of tolerated failures or None. 974 - max_failures is the absolute maximum number of tolerated failures or None.
959 - no_cr: makes output friendly to piped logs. 975 - no_cr: makes output friendly to piped logs.
960 - gtest_output: saves results as xml. 976 - gtest_output: saves results as xml.
961 - result_file: saves results as json. 977 - result_file: saves results as json.
962 - verbose: print more details. 978 - verbose: print more details.
963 979
964 It may run a subset of the test cases if too many test cases failed, as 980 It may run a subset of the test cases if too many test cases failed, as
965 determined with max_failures, retries and run_all. 981 determined with max_failures, retries and run_all.
966 """ 982 """
967 assert 0 <= retries <= 100000 983 assert 0 <= retries <= 100000
968 if not test_cases: 984 if not test_cases:
969 return 0 985 return 0
970 if run_all: 986 if run_all:
971 decider = RunAll() 987 decider = RunAll()
972 else: 988 else:
973 # If 10% of test cases fail, just too bad. 989 # If 10% of test cases fail, just too bad.
974 decider = RunSome(len(test_cases), retries, 2, 0.1, max_failures) 990 decider = RunSome(len(test_cases), retries, 2, 0.1, max_failures)
975 991
976 if not clusters: 992 if not clusters:
977 clusters = calc_cluster_default(len(test_cases), jobs) 993 clusters = calc_cluster_default(len(test_cases), jobs)
978 else: 994 else:
979 # Limit the value. 995 # Limit the value.
980 clusters = min(clusters, len(test_cases) / jobs) 996 clusters = min(clusters, len(test_cases) / jobs)
981 997
998 logging.debug('%d test cases with clusters of %d', len(test_cases), clusters)
999
982 if gtest_output: 1000 if gtest_output:
983 gtest_output = gen_gtest_output_dir(cwd, gtest_output) 1001 gtest_output = gen_gtest_output_dir(cwd, gtest_output)
984 progress = Progress(len(test_cases)) 1002 progress = Progress(len(test_cases))
985 serial_tasks = QueueWithProgress(0) 1003 serial_tasks = QueueWithProgress(0)
986 serial_tasks.set_progress(progress) 1004 serial_tasks.set_progress(progress)
987 1005
988 def add_serial_task(priority, func, *args, **kwargs): 1006 def add_serial_task(priority, func, *args, **kwargs):
989 """Adds a serial task, to be executed later.""" 1007 """Adds a serial task, to be executed later."""
990 assert isinstance(priority, int) 1008 assert isinstance(priority, int)
991 assert callable(func) 1009 assert callable(func)
992 serial_tasks.put((priority, func, args, kwargs)) 1010 serial_tasks.put((priority, func, args, kwargs))
993 1011
994 with ThreadPool(progress, jobs, jobs, len(test_cases)) as pool: 1012 with ThreadPool(progress, jobs, jobs, len(test_cases)) as pool:
995 runner = Runner( 1013 runner = Runner(
996 cmd, cwd, timeout, progress, retries, decider, verbose, 1014 cmd, cwd, timeout, progress, retries, decider, verbose,
997 pool.add_task, add_serial_task) 1015 pool.add_task, add_serial_task)
998 function = runner.map 1016 function = runner.map
999 logging.debug('Adding tests to ThreadPool')
1000 progress.use_cr_only = not no_cr 1017 progress.use_cr_only = not no_cr
1001 for i, test_case in enumerate(test_cases): 1018 # Cluster the test cases right away.
1002 pool.add_task(i, function, i, test_case, 0) 1019 for i in xrange((len(test_cases) + clusters - 1) / clusters):
1003 logging.debug('All tests added to the ThreadPool') 1020 cluster = test_cases[i*clusters : (i+1)*clusters]
1021 pool.add_task(i, function, i, cluster, 0)
1004 results = pool.join() 1022 results = pool.join()
1005 1023
1006 # Retry any failed tests serially. 1024 # Retry any failed tests serially.
1007 if not serial_tasks.empty(): 1025 if not serial_tasks.empty():
1008 progress.update_item('\n'.join(running_serial_warning()), index=False, 1026 progress.update_item('\n'.join(running_serial_warning()), index=False,
1009 size=False) 1027 size=False)
1010 1028
1011 while not serial_tasks.empty(): 1029 while not serial_tasks.empty():
1012 _priority, func, args, kwargs = serial_tasks.get() 1030 _priority, func, args, kwargs = serial_tasks.get()
1013 results.append(func(*args, **kwargs)) 1031 results.append(func(*args, **kwargs))
1014 serial_tasks.task_done() 1032 serial_tasks.task_done()
1015 1033
1016 # Call join since that is a standard call once a queue has been emptied. 1034 # Call join since that is a standard call once a queue has been emptied.
1017 serial_tasks.join() 1035 serial_tasks.join()
1018 1036
1019 duration = time.time() - pool.tasks.progress.start 1037 duration = time.time() - pool.tasks.progress.start
1020 1038
1021 cleaned = {} 1039 cleaned = {}
1022 for item in results: 1040 for map_run in results:
1023 if item: 1041 for test_case_result in map_run:
1024 cleaned.setdefault(item[0]['test_case'], []).extend(item) 1042 cleaned.setdefault(test_case_result['test_case'], []).append(
1043 test_case_result)
1025 results = cleaned 1044 results = cleaned
1026 1045
1027 # Total time taken to run each test case. 1046 # Total time taken to run each test case.
1028 test_case_duration = dict( 1047 test_case_duration = dict(
1029 (test_case, sum(i.get('duration', 0) for i in item)) 1048 (test_case, sum((i.get('duration') or 0) for i in item))
1030 for test_case, item in results.iteritems()) 1049 for test_case, item in results.iteritems())
1031 1050
1032 # Classify the results 1051 # Classify the results
1033 success = [] 1052 success = []
1034 flaky = [] 1053 flaky = []
1035 fail = [] 1054 fail = []
1036 nb_runs = 0 1055 nb_runs = 0
1037 for test_case in sorted(results): 1056 for test_case in sorted(results):
1038 items = results[test_case] 1057 items = results[test_case]
1039 nb_runs += len(items) 1058 nb_runs += len(items)
1040 if not any(not i['returncode'] for i in items): 1059 if not any(i['returncode'] == 0 for i in items):
1041 fail.append(test_case) 1060 fail.append(test_case)
1042 elif len(items) > 1 and any(not i['returncode'] for i in items): 1061 elif len(items) > 1 and any(i['returncode'] == 0 for i in items):
1043 flaky.append(test_case) 1062 flaky.append(test_case)
1044 elif len(items) == 1 and items[0]['returncode'] == 0: 1063 elif len(items) == 1 and items[0]['returncode'] == 0:
1045 success.append(test_case) 1064 success.append(test_case)
1046 else: 1065 else:
1066 # The test never ran.
1047 assert False, items 1067 assert False, items
1048 missing = list(set(test_cases) - set(success) - set(flaky) - set(fail)) 1068 missing = list(set(test_cases) - set(success) - set(flaky) - set(fail))
1049 1069
1050 saved = { 1070 saved = {
1051 'test_cases': results, 1071 'test_cases': results,
1052 'expected': len(test_cases), 1072 'expected': len(test_cases),
1053 'success': success, 1073 'success': success,
1054 'flaky': flaky, 1074 'flaky': flaky,
1055 'fail': fail, 1075 'fail': fail,
1056 'missing': missing, 1076 'missing': missing,
(...skipping 303 matching lines...) Expand 10 before | Expand all | Expand 10 after
1360 options.gtest_output, 1380 options.gtest_output,
1361 result_file, 1381 result_file,
1362 options.verbose) 1382 options.verbose)
1363 except Failure as e: 1383 except Failure as e:
1364 print >> sys.stderr, e.args[0] 1384 print >> sys.stderr, e.args[0]
1365 return 1 1385 return 1
1366 1386
1367 1387
1368 if __name__ == '__main__': 1388 if __name__ == '__main__':
1369 sys.exit(main(sys.argv[1:])) 1389 sys.exit(main(sys.argv[1:]))
OLDNEW
« no previous file with comments | « no previous file | tests/gtest_fake/expected.xml » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698