Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 #!/usr/bin/env python | |
| 2 # Copyright 2013 The Chromium Authors. All rights reserved. | |
| 3 # Use of this source code is governed by a BSD-style license that can be | |
| 4 # found in the LICENSE file. | |
| 5 | |
| 6 """Triggers a ton of fake jobs to test its handling under high load. | |
| 7 | |
| 8 Generates an histogram with the latencies to process the tasks and number of | |
| 9 retries. | |
| 10 """ | |
| 11 | |
| 12 import json | |
| 13 import logging | |
| 14 import optparse | |
| 15 import os | |
| 16 import random | |
| 17 import socket | |
| 18 import string | |
| 19 import sys | |
| 20 import time | |
| 21 | |
| 22 ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | |
| 23 | |
| 24 sys.path.insert(0, ROOT_DIR) | |
| 25 | |
| 26 from third_party import colorama | |
| 27 | |
| 28 import swarming | |
| 29 | |
| 30 from utils import graph | |
| 31 from utils import net | |
| 32 from utils import threading_utils | |
| 33 | |
| 34 | |
| 35 def print_results(results, columns, buckets): | |
| 36 delays = [i for i in results if isinstance(i, float)] | |
| 37 failures = [i for i in results if not isinstance(i, float)] | |
| 38 | |
| 39 graph.print_histogram( | |
| 40 graph.generate_histogram(delays, buckets), columns, '%5.3f') | |
| 41 print('') | |
| 42 print('Total items : %d' % len(results)) | |
| 43 average = 0 | |
| 44 if delays: | |
| 45 average = sum(delays)/ len(delays) | |
| 46 print('Average delay: %s' % graph.to_units(average)) | |
| 47 #print('Average overhead: %s' % graph.to_units(total_size / len(sizes))) | |
| 48 print('') | |
| 49 if failures: | |
| 50 print('') | |
| 51 print('%sFAILURES%s:' % (colorama.Fore.RED, colorama.Fore.RESET)) | |
| 52 print('\n'.join(' %s' % i for i in failures)) | |
| 53 | |
| 54 | |
| 55 def trigger_task(swarming_url, progress, unique, timeout, index): | |
| 56 """Triggers a Swarming job and collects results. | |
| 57 | |
| 58 Returns the total amount of time to run a task remotely, including all the | |
| 59 overhead. | |
| 60 """ | |
| 61 name = 'load-test-%d-%s' % (index, unique) | |
| 62 start = time.time() | |
| 63 | |
| 64 logging.info('trigger') | |
| 65 manifest = swarming.Manifest( | |
| 66 None, name, 1, None, 'AIX', '', 'http://localhost:1', False, False, 100, | |
| 67 None) | |
| 68 data = {'request': manifest.to_json()} | |
| 69 response = net.url_open(swarming_url + '/test', data=data) | |
|
csharp
2013/10/03 16:08:04
I encountered problems with this locally when I ac
| |
| 70 if not response: | |
| 71 # Failed to trigger. Return a failure. | |
| 72 return 'failed_trigger' | |
| 73 result = json.load(response) | |
| 74 test_key = result['test_keys'][0].pop('test_key') | |
| 75 assert test_key | |
| 76 expected = { | |
| 77 'test_case_name': name, | |
| 78 'test_keys': [ | |
| 79 { | |
| 80 'config_name': 'AIX', | |
| 81 'num_instances': 1, | |
| 82 'instance_index': 0, | |
| 83 }, | |
| 84 ], | |
| 85 } | |
| 86 assert result == expected, result | |
| 87 progress.update_item('%5d' % index, processing=1) | |
| 88 try: | |
| 89 logging.info('collect') | |
| 90 test_keys = swarming.get_test_keys(swarming_url, name) | |
| 91 if not test_keys: | |
| 92 return 'no_test_keys' | |
| 93 assert test_keys == [test_key], test_keys | |
| 94 out = [ | |
| 95 output | |
| 96 for _index, output in swarming.yield_results( | |
| 97 swarming_url, test_keys, timeout, None) | |
| 98 ] | |
| 99 if not out: | |
| 100 return 'no_result' | |
| 101 fqdn = socket.getfqdn().lower() | |
| 102 assert out[0].pop('machine_tag').startswith(fqdn) | |
| 103 assert out[0].pop('machine_id').startswith(fqdn) | |
| 104 expected = [ | |
| 105 { | |
| 106 u'config_instance_index': 0, | |
| 107 u'exit_codes': u'0', | |
| 108 u'num_config_instances': 1, | |
| 109 u'output': u'This task ran with great success', | |
|
csharp
2013/10/03 16:08:04
Could we move this into a constant shared by the t
Marc-Antoine Ruel (Google)
2013/10/03 16:47:11
Done.
| |
| 110 }, | |
| 111 ] | |
| 112 assert out == expected, '\n%s\n%s' % (out, expected) | |
| 113 return time.time() - start | |
| 114 finally: | |
| 115 progress.update_item('%5d - done' % index, processing=-1, processed=1) | |
| 116 | |
| 117 | |
| 118 def main(): | |
| 119 colorama.init() | |
| 120 parser = optparse.OptionParser(description=sys.modules[__name__].__doc__) | |
| 121 parser.add_option( | |
| 122 '-S', '--swarming', | |
| 123 metavar='URL', default='', | |
| 124 help='Swarming server to use') | |
| 125 | |
| 126 group = optparse.OptionGroup(parser, 'Load generated') | |
| 127 group.add_option( | |
| 128 '-s', '--send-rate', type='float', default=16., metavar='RATE', | |
| 129 help='Rate (item/s) of sending requests as a float, default: %default') | |
| 130 group.add_option( | |
| 131 '-d', '--duration', type='float', default=60., metavar='N', | |
| 132 help='Duration (s) of the sending phase of the load test, ' | |
| 133 'default: %default') | |
| 134 group.add_option( | |
| 135 '-m', '--concurrent', type='int', default=200, metavar='N', | |
| 136 help='Maximum concurrent on-going requests, default: %default') | |
| 137 group.add_option( | |
| 138 '-t', '--timeout', type='float', default=3600., metavar='N', | |
| 139 help='Timeout to get results, default: %default') | |
| 140 parser.add_option_group(group) | |
| 141 | |
| 142 group = optparse.OptionGroup(parser, 'Display options') | |
| 143 group.add_option( | |
| 144 '--columns', type='int', default=graph.get_console_width(), metavar='N', | |
| 145 help='For histogram display, default:%default') | |
| 146 group.add_option( | |
| 147 '--buckets', type='int', default=20, metavar='N', | |
| 148 help='Number of buckets for histogram display, default:%default') | |
| 149 parser.add_option_group(group) | |
| 150 | |
| 151 parser.add_option( | |
| 152 '--dump', metavar='FOO.JSON', help='Dumps to json file') | |
| 153 parser.add_option( | |
| 154 '-v', '--verbose', action='store_true', help='Enables logging') | |
| 155 | |
| 156 options, args = parser.parse_args() | |
| 157 logging.basicConfig(level=logging.INFO if options.verbose else logging.FATAL) | |
| 158 if args: | |
| 159 parser.error('Unsupported args: %s' % args) | |
| 160 options.swarming = options.swarming.rstrip('/') | |
| 161 if not options.swarming: | |
| 162 parser.error('--swarming is required.') | |
| 163 if options.duration <= 0: | |
| 164 parser.error('Needs --duration > 0. 0.01 is a valid value.') | |
| 165 | |
| 166 total = options.send_rate * options.duration | |
| 167 print( | |
| 168 'Sending %.1f i/s for %ds with max %d parallel requests; timeout %.1fs; ' | |
| 169 'total %d' % | |
| 170 (options.send_rate, options.duration, options.concurrent, | |
| 171 options.timeout, total)) | |
| 172 print('[processing/processed/todo]') | |
| 173 | |
| 174 # This is used so there's no clash between runs and actual real usage. | |
| 175 unique = ''.join(random.choice(string.ascii_letters) for _ in range(8)) | |
| 176 columns = [('processing', 0), ('processed', 0), ('todo', 0)] | |
| 177 progress = threading_utils.Progress(columns) | |
| 178 index = 0 | |
| 179 with threading_utils.ThreadPoolWithProgress( | |
| 180 progress, 1, options.concurrent, 0) as pool: | |
| 181 try: | |
| 182 start = time.time() | |
| 183 while True: | |
| 184 duration = time.time() - start | |
| 185 if duration > options.duration: | |
| 186 break | |
| 187 should_have_triggered = int(duration * options.send_rate) | |
|
csharp
2013/10/03 16:08:04
Nit: should_have_triggered -> should_have_triggere
Marc-Antoine Ruel (Google)
2013/10/03 16:47:11
Done.
| |
| 188 while index < should_have_triggered: | |
| 189 pool.add_task( | |
| 190 0, | |
| 191 trigger_task, | |
| 192 options.swarming, | |
| 193 progress, | |
| 194 unique, | |
| 195 options.timeout, | |
| 196 index) | |
| 197 progress.update_item('', todo=1) | |
| 198 index += 1 | |
| 199 progress.print_update() | |
| 200 time.sleep(0.01) | |
| 201 except KeyboardInterrupt: | |
| 202 aborted = pool.abort() | |
| 203 progress.update_item( | |
| 204 'Got Ctrl-C. Aborted %d unsent tasks.' % aborted, | |
| 205 raw=True, | |
| 206 todo=-aborted) | |
| 207 progress.print_update() | |
| 208 finally: | |
| 209 # TODO(maruel): We could give up on collecting results for the on-going | |
| 210 # tasks but that would need to be optional. | |
| 211 progress.update_item('Getting results for on-going tasks.', raw=True) | |
| 212 results = sorted(pool.join()) | |
| 213 progress.print_update() | |
| 214 # At this point, progress is not used anymore. | |
| 215 print('') | |
| 216 print(' - Took %.1fs.' % (time.time() - start)) | |
| 217 print('') | |
| 218 print_results(results, options.columns, options.buckets) | |
| 219 if options.dump: | |
| 220 with open(options.dump, 'w') as f: | |
| 221 json.dump(results, f, separators=(',',':')) | |
| 222 return 0 | |
| 223 | |
| 224 | |
| 225 if __name__ == '__main__': | |
| 226 sys.exit(main()) | |
| OLD | NEW |