tools/swarming_load_test_client.py - Issue 25530003: Rename load_test to isolateserver_load_test, create swarming_load_test.

Side by Side Diff: tools/swarming_load_test_client.py

Issue 25530003: Rename load_test to isolateserver_load_test, create swarming_load_test. (Closed) Base URL: https://chromium.googlesource.com/a/chromium/tools/swarm_client@2_exception

Patch Set: Address review comments Created 7 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 #!/usr/bin/env python

	2 # Copyright 2013 The Chromium Authors. All rights reserved.

	3 # Use of this source code is governed by a BSD-style license that can be

	4 # found in the LICENSE file.

	5

	6 """Triggers a ton of fake jobs to test its handling under high load.

	7

	8 Generates an histogram with the latencies to process the tasks and number of

	9 retries.

	10 """

	11

	12 import json

	13 import logging

	14 import optparse

	15 import os

	16 import random

	17 import socket

	18 import string

	19 import sys

	20 import time

	21

	22 ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))

	23

	24 sys.path.insert(0, ROOT_DIR)

	25

	26 from third_party import colorama

	27

	28 import swarming

	29

	30 from utils import graph

	31 from utils import net

	32 from utils import threading_utils

	33

	34

	35 def print_results(results, columns, buckets):

	36 delays = [i for i in results if isinstance(i, float)]

	37 failures = [i for i in results if not isinstance(i, float)]

	38

	39 graph.print_histogram(

	40 graph.generate_histogram(delays, buckets), columns, '%5.3f')

	41 print('')

	42 print('Total items : %d' % len(results))

	43 average = 0

	44 if delays:

	45 average = sum(delays)/ len(delays)

	46 print('Average delay: %s' % graph.to_units(average))

	47 #print('Average overhead: %s' % graph.to_units(total_size / len(sizes)))

	48 print('')

	49 if failures:

	50 print('')

	51 print('%sFAILURES%s:' % (colorama.Fore.RED, colorama.Fore.RESET))

	52 print('\n'.join(' %s' % i for i in failures))

	53

	54

	55 def trigger_task(swarming_url, progress, unique, timeout, index):

	56 """Triggers a Swarming job and collects results.

	57

	58 Returns the total amount of time to run a task remotely, including all the

	59 overhead.

	60 """

	61 name = 'load-test-%d-%s' % (index, unique)

	62 start = time.time()

	63

	64 logging.info('trigger')

	65 manifest = swarming.Manifest(

	66 None, name, 1, None, 'AIX', '', 'http://localhost:1', False, False, 100,

	67 None)

	68 data = {'request': manifest.to_json()}

	69 response = net.url_open(swarming_url + '/test', data=data)
	csharp 2013/10/03 16:08:04 I encountered problems with this locally when I ac I encountered problems with this locally when I accidentally typed http:// instead of https://. The redirect seemed to cause problems because the redirect was a GET instead of a POST (so swarm rejected it). I didn't dig too deeply into what code in this script couldn't handle that, but it didn't give me a nice error message so I was confused for a bit.
	70 if not response:

	71 # Failed to trigger. Return a failure.

	72 return 'failed_trigger'

	73 result = json.load(response)

	74 test_key = result['test_keys'][0].pop('test_key')

	75 assert test_key

	76 expected = {

	77 'test_case_name': name,

	78 'test_keys': [

	79 {

	80 'config_name': 'AIX',

	81 'num_instances': 1,

	82 'instance_index': 0,

	83 },

	84 ],

	85 }

	86 assert result == expected, result

	87 progress.update_item('%5d' % index, processing=1)

	88 try:

	89 logging.info('collect')

	90 test_keys = swarming.get_test_keys(swarming_url, name)

	91 if not test_keys:

	92 return 'no_test_keys'

	93 assert test_keys == [test_key], test_keys

	94 out = [

	95 output

	96 for _index, output in swarming.yield_results(

	97 swarming_url, test_keys, timeout, None)

	98 ]

	99 if not out:

	100 return 'no_result'

	101 fqdn = socket.getfqdn().lower()

	102 assert out[0].pop('machine_tag').startswith(fqdn)

	103 assert out[0].pop('machine_id').startswith(fqdn)

	104 expected = [

	105 {

	106 u'config_instance_index': 0,

	107 u'exit_codes': u'0',

	108 u'num_config_instances': 1,

	109 u'output': u'This task ran with great success',
	csharp 2013/10/03 16:08:04 Could we move this into a constant shared by the t Could we move this into a constant shared by the two scripts so they don't need to redefine it. Marc-Antoine Ruel (Google) 2013/10/03 16:47:11 Done. Show quoted text On 2013/10/03 16:08:04, csharp wrote: > Could we move this into a constant shared by the two scripts so they don't need > to redefine it. Done.
	110 },

	111 ]

	112 assert out == expected, '\n%s\n%s' % (out, expected)

	113 return time.time() - start

	114 finally:

	115 progress.update_item('%5d - done' % index, processing=-1, processed=1)

	116

	117

	118 def main():

	119 colorama.init()

	120 parser = optparse.OptionParser(description=sys.modules[__name__].__doc__)

	121 parser.add_option(

	122 '-S', '--swarming',

	123 metavar='URL', default='',

	124 help='Swarming server to use')

	125

	126 group = optparse.OptionGroup(parser, 'Load generated')

	127 group.add_option(

	128 '-s', '--send-rate', type='float', default=16., metavar='RATE',

	129 help='Rate (item/s) of sending requests as a float, default: %default')

	130 group.add_option(

	131 '-d', '--duration', type='float', default=60., metavar='N',

	132 help='Duration (s) of the sending phase of the load test, '

	133 'default: %default')

	134 group.add_option(

	135 '-m', '--concurrent', type='int', default=200, metavar='N',

	136 help='Maximum concurrent on-going requests, default: %default')

	137 group.add_option(

	138 '-t', '--timeout', type='float', default=3600., metavar='N',

	139 help='Timeout to get results, default: %default')

	140 parser.add_option_group(group)

	141

	142 group = optparse.OptionGroup(parser, 'Display options')

	143 group.add_option(

	144 '--columns', type='int', default=graph.get_console_width(), metavar='N',

	145 help='For histogram display, default:%default')

	146 group.add_option(

	147 '--buckets', type='int', default=20, metavar='N',

	148 help='Number of buckets for histogram display, default:%default')

	149 parser.add_option_group(group)

	150

	151 parser.add_option(

	152 '--dump', metavar='FOO.JSON', help='Dumps to json file')

	153 parser.add_option(

	154 '-v', '--verbose', action='store_true', help='Enables logging')

	155

	156 options, args = parser.parse_args()

	157 logging.basicConfig(level=logging.INFO if options.verbose else logging.FATAL)

	158 if args:

	159 parser.error('Unsupported args: %s' % args)

	160 options.swarming = options.swarming.rstrip('/')

	161 if not options.swarming:

	162 parser.error('--swarming is required.')

	163 if options.duration <= 0:

	164 parser.error('Needs --duration > 0. 0.01 is a valid value.')

	165

	166 total = options.send_rate * options.duration

	167 print(

	168 'Sending %.1f i/s for %ds with max %d parallel requests; timeout %.1fs; '

	169 'total %d' %

	170 (options.send_rate, options.duration, options.concurrent,

	171 options.timeout, total))

	172 print('[processing/processed/todo]')

	173

	174 # This is used so there's no clash between runs and actual real usage.

	175 unique = ''.join(random.choice(string.ascii_letters) for _ in range(8))

	176 columns = [('processing', 0), ('processed', 0), ('todo', 0)]

	177 progress = threading_utils.Progress(columns)

	178 index = 0

	179 with threading_utils.ThreadPoolWithProgress(

	180 progress, 1, options.concurrent, 0) as pool:

	181 try:

	182 start = time.time()

	183 while True:

	184 duration = time.time() - start

	185 if duration > options.duration:

	186 break

	187 should_have_triggered = int(duration * options.send_rate)
	csharp 2013/10/03 16:08:04 Nit: should_have_triggered -> should_have_triggere Nit: should_have_triggered -> should_have_triggered_so_far (It took me a bit to figure out how this worked to spread out the tests) Marc-Antoine Ruel (Google) 2013/10/03 16:47:11 Done. Show quoted text On 2013/10/03 16:08:04, csharp wrote: > Nit: should_have_triggered -> should_have_triggered_so_far > (It took me a bit to figure out how this worked to spread out the tests) Done.
	188 while index < should_have_triggered:

	189 pool.add_task(

	190 0,

	191 trigger_task,

	192 options.swarming,

	193 progress,

	194 unique,

	195 options.timeout,

	196 index)

	197 progress.update_item('', todo=1)

	198 index += 1

	199 progress.print_update()

	200 time.sleep(0.01)

	201 except KeyboardInterrupt:

	202 aborted = pool.abort()

	203 progress.update_item(

	204 'Got Ctrl-C. Aborted %d unsent tasks.' % aborted,

	205 raw=True,

	206 todo=-aborted)

	207 progress.print_update()

	208 finally:

	209 # TODO(maruel): We could give up on collecting results for the on-going

	210 # tasks but that would need to be optional.

	211 progress.update_item('Getting results for on-going tasks.', raw=True)

	212 results = sorted(pool.join())

	213 progress.print_update()

	214 # At this point, progress is not used anymore.

	215 print('')

	216 print(' - Took %.1fs.' % (time.time() - start))

	217 print('')

	218 print_results(results, options.columns, options.buckets)

	219 if options.dump:

	220 with open(options.dump, 'w') as f:

	221 json.dump(results, f, separators=(',',':'))

	222 return 0

	223

	224

	225 if __name__ == '__main__':

	226 sys.exit(main())

OLD	NEW

« tools/swarming_load_test_bot.py ('K') | « tools/swarming_load_test_bot.py ('k') | utils/threading_utils.py » ('j') | utils/threading_utils.py » ('J')