| OLD | NEW |
| 1 # Copyright 2013 The LUCI Authors. All rights reserved. | 1 # Copyright 2013 The LUCI Authors. All rights reserved. |
| 2 # Use of this source code is governed under the Apache License, Version 2.0 | 2 # Use of this source code is governed under the Apache License, Version 2.0 |
| 3 # that can be found in the LICENSE file. | 3 # that can be found in the LICENSE file. |
| 4 | 4 |
| 5 """Swarming bot main process. | 5 """Swarming bot main process. |
| 6 | 6 |
| 7 This is the program that communicates with the Swarming server, ensures the code | 7 This is the program that communicates with the Swarming server, ensures the code |
| 8 is always up to date and executes a child process to run tasks and upload | 8 is always up to date and executes a child process to run tasks and upload |
| 9 results back. | 9 results back. |
| 10 | 10 |
| 11 It manages self-update and rebooting the host in case of problems. | 11 It manages self-update and rebooting the host in case of problems. |
| 12 | 12 |
| 13 Set the environment variable SWARMING_LOAD_TEST=1 to disable the use of | 13 Set the environment variable SWARMING_LOAD_TEST=1 to disable the use of |
| 14 server-provided bot_config.py. This permits safe load testing. | 14 server-provided bot_config.py. This permits safe load testing. |
| 15 """ | 15 """ |
| 16 | 16 |
| 17 import contextlib | 17 import contextlib |
| 18 import json | 18 import json |
| 19 import logging | 19 import logging |
| 20 import optparse | 20 import optparse |
| 21 import os | 21 import os |
| 22 import shutil | 22 import shutil |
| 23 import signal | |
| 24 import sys | 23 import sys |
| 25 import tempfile | 24 import tempfile |
| 26 import threading | 25 import threading |
| 27 import time | 26 import time |
| 28 import traceback | 27 import traceback |
| 29 import zipfile | 28 import zipfile |
| 30 | 29 |
| 30 import bot_auth |
| 31 import common | 31 import common |
| 32 import file_refresher |
| 33 import remote_client |
| 32 import singleton | 34 import singleton |
| 33 from api import bot | 35 from api import bot |
| 34 from api import os_utilities | 36 from api import os_utilities |
| 35 from utils import file_path | 37 from utils import file_path |
| 36 from utils import net | 38 from utils import net |
| 37 from utils import on_error | 39 from utils import on_error |
| 38 from utils import subprocess42 | 40 from utils import subprocess42 |
| 39 from utils import zip_package | 41 from utils import zip_package |
| 40 | 42 |
| 41 | 43 |
| (...skipping 121 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 163 should_continue = bot_config.setup_bot(botobj) | 165 should_continue = bot_config.setup_bot(botobj) |
| 164 except Exception as e: | 166 except Exception as e: |
| 165 msg = '%s\n%s' % (e, traceback.format_exc()[-2048:]) | 167 msg = '%s\n%s' % (e, traceback.format_exc()[-2048:]) |
| 166 botobj.post_error('bot_config.setup_bot() threw: %s' % msg) | 168 botobj.post_error('bot_config.setup_bot() threw: %s' % msg) |
| 167 return | 169 return |
| 168 | 170 |
| 169 if not should_continue and not skip_reboot: | 171 if not should_continue and not skip_reboot: |
| 170 botobj.restart('Starting new swarming bot: %s' % THIS_FILE) | 172 botobj.restart('Starting new swarming bot: %s' % THIS_FILE) |
| 171 | 173 |
| 172 | 174 |
| 175 def get_authentication_headers(botobj): |
| 176 """Calls bot_config.get_authentication_headers() if it is defined. |
| 177 |
| 178 Doesn't catch exceptions. |
| 179 """ |
| 180 if _in_load_test_mode(): |
| 181 return (None, None) |
| 182 logging.info('get_authentication_headers()') |
| 183 from config import bot_config |
| 184 func = getattr(bot_config, 'get_authentication_headers', None) |
| 185 return func(botobj) if func else (None, None) |
| 186 |
| 187 |
| 173 ### end of bot_config handler part. | 188 ### end of bot_config handler part. |
| 174 | 189 |
| 175 | 190 |
| 176 def get_min_free_space(): | 191 def get_min_free_space(): |
| 177 """Returns free disk space needed. | 192 """Returns free disk space needed. |
| 178 | 193 |
| 179 Add a "250 MiB slack space" for logs, temporary files and whatever other leak. | 194 Add a "250 MiB slack space" for logs, temporary files and whatever other leak. |
| 180 """ | 195 """ |
| 181 return int((os_utilities.get_min_free_space(THIS_FILE) + 250.) * 1024 * 1024) | 196 return int((os_utilities.get_min_free_space(THIS_FILE) + 250.) * 1024 * 1024) |
| 182 | 197 |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 221 exception handler for incoming commands from the Swarming server. If for | 236 exception handler for incoming commands from the Swarming server. If for |
| 222 any reason the local test runner script can not be run successfully, | 237 any reason the local test runner script can not be run successfully, |
| 223 this function is invoked. | 238 this function is invoked. |
| 224 """ | 239 """ |
| 225 logging.error('Error: %s', error) | 240 logging.error('Error: %s', error) |
| 226 data = { | 241 data = { |
| 227 'id': botobj.id, | 242 'id': botobj.id, |
| 228 'message': error, | 243 'message': error, |
| 229 'task_id': task_id, | 244 'task_id': task_id, |
| 230 } | 245 } |
| 231 return net.url_read_json( | 246 return botobj.remote.url_read_json( |
| 232 botobj.server + '/swarming/api/v1/bot/task_error/%s' % task_id, data=data) | 247 '/swarming/api/v1/bot/task_error/%s' % task_id, data=data) |
| 233 | 248 |
| 234 | 249 |
| 235 def on_shutdown_hook(b): | 250 def on_shutdown_hook(b): |
| 236 """Called when the bot is restarting.""" | 251 """Called when the bot is restarting.""" |
| 237 call_hook(b, 'on_bot_shutdown') | 252 call_hook(b, 'on_bot_shutdown') |
| 238 # Aggressively set itself up so we ensure the auto-reboot configuration is | 253 # Aggressively set itself up so we ensure the auto-reboot configuration is |
| 239 # fine before restarting the host. This is important as some tasks delete the | 254 # fine before restarting the host. This is important as some tasks delete the |
| 240 # autorestart script (!) | 255 # autorestart script (!) |
| 241 setup_bot(True) | 256 setup_bot(True) |
| 242 | 257 |
| 243 | 258 |
| 244 def get_bot(): | 259 def get_bot(): |
| 245 """Returns a valid Bot instance. | 260 """Returns a valid Bot instance. |
| 246 | 261 |
| 247 Should only be called once in the process lifetime. | 262 Should only be called once in the process lifetime. |
| 248 """ | 263 """ |
| 249 # This variable is used to bootstrap the initial bot.Bot object, which then is | 264 # This variable is used to bootstrap the initial bot.Bot object, which then is |
| 250 # used to get the dimensions and state. | 265 # used to get the dimensions and state. |
| 251 attributes = { | 266 attributes = { |
| 252 'dimensions': {u'id': ['none']}, | 267 'dimensions': {u'id': ['none']}, |
| 253 'state': {}, | 268 'state': {}, |
| 254 'version': generate_version(), | 269 'version': generate_version(), |
| 255 } | 270 } |
| 256 config = get_config() | 271 config = get_config() |
| 257 assert not config['server'].endswith('/'), config | 272 assert not config['server'].endswith('/'), config |
| 258 | 273 |
| 259 # Create a temporary object to call the hooks. | 274 # Use temporary Bot object to call get_attributes. Attributes are needed to |
| 275 # construct the "real" bot.Bot. |
| 276 attributes = get_attributes( |
| 277 bot.Bot( |
| 278 remote_client.RemoteClient(config['server'], None), |
| 279 attributes, |
| 280 config['server'], |
| 281 config['server_version'], |
| 282 os.path.dirname(THIS_FILE), |
| 283 on_shutdown_hook)) |
| 284 |
| 285 # Make remote client callback use the returned bot object. We assume here |
| 286 # RemoteClient doesn't call its callback in the constructor (since 'botobj' is |
| 287 # undefined during the construction). |
| 260 botobj = bot.Bot( | 288 botobj = bot.Bot( |
| 289 remote_client.RemoteClient( |
| 290 config['server'], |
| 291 lambda: get_authentication_headers(botobj)), |
| 261 attributes, | 292 attributes, |
| 262 config['server'], | 293 config['server'], |
| 263 config['server_version'], | 294 config['server_version'], |
| 264 os.path.dirname(THIS_FILE), | 295 os.path.dirname(THIS_FILE), |
| 265 on_shutdown_hook) | 296 on_shutdown_hook) |
| 266 return bot.Bot( | 297 return botobj |
| 267 get_attributes(botobj), | |
| 268 config['server'], | |
| 269 config['server_version'], | |
| 270 os.path.dirname(THIS_FILE), | |
| 271 on_shutdown_hook) | |
| 272 | 298 |
| 273 | 299 |
| 274 def clean_isolated_cache(botobj): | 300 def clean_isolated_cache(botobj): |
| 275 """Asks run_isolated to clean its cache. | 301 """Asks run_isolated to clean its cache. |
| 276 | 302 |
| 277 This may take a while but it ensures that in the case of a run_isolated run | 303 This may take a while but it ensures that in the case of a run_isolated run |
| 278 failed and it temporarily used more space than min_free_disk, it can cleans up | 304 failed and it temporarily used more space than min_free_disk, it can cleans up |
| 279 the mess properly. | 305 the mess properly. |
| 280 | 306 |
| 281 It will remove unexpected files, remove corrupted files, trim the cache size | 307 It will remove unexpected files, remove corrupted files, trim the cache size |
| (...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 328 # First thing is to get an arbitrary url. This also ensures the network is | 354 # First thing is to get an arbitrary url. This also ensures the network is |
| 329 # up and running, which is necessary before trying to get the FQDN below. | 355 # up and running, which is necessary before trying to get the FQDN below. |
| 330 resp = net.url_read(config['server'] + '/swarming/api/v1/bot/server_ping') | 356 resp = net.url_read(config['server'] + '/swarming/api/v1/bot/server_ping') |
| 331 if resp is None: | 357 if resp is None: |
| 332 logging.error('No response from server_ping') | 358 logging.error('No response from server_ping') |
| 333 except Exception as e: | 359 except Exception as e: |
| 334 # url_read() already traps pretty much every exceptions. This except | 360 # url_read() already traps pretty much every exceptions. This except |
| 335 # clause is kept there "just in case". | 361 # clause is kept there "just in case". |
| 336 logging.exception('server_ping threw') | 362 logging.exception('server_ping threw') |
| 337 | 363 |
| 364 # Next we make sure the bot can make authenticated calls by grabbing |
| 365 # the auth headers, retrying on errors a bunch of times. We don't give up |
| 366 # if it fails though (maybe the bot will "fix itself" later). |
| 367 botobj = get_bot() |
| 368 try: |
| 369 botobj.remote.initialize(quit_bit) |
| 370 except remote_client.InitializationError as exc: |
| 371 botobj.post_error('failed to grab auth headers: %s' % exc.last_error) |
| 372 logging.error('Can\'t grab auth headers, continuing anyway...') |
| 373 |
| 338 if quit_bit.is_set(): | 374 if quit_bit.is_set(): |
| 339 logging.info('Early quit 1') | 375 logging.info('Early quit 1') |
| 340 return 0 | 376 return 0 |
| 341 | 377 |
| 342 # If this fails, there's hardly anything that can be done, the bot can't | 378 # If this fails, there's hardly anything that can be done, the bot can't |
| 343 # even get to the point to be able to self-update. | 379 # even get to the point to be able to self-update. |
| 344 botobj = get_bot() | 380 resp = botobj.remote.url_read_json( |
| 345 resp = net.url_read_json( | 381 '/swarming/api/v1/bot/handshake', data=botobj._attributes) |
| 346 botobj.server + '/swarming/api/v1/bot/handshake', | |
| 347 data=botobj._attributes) | |
| 348 if not resp: | 382 if not resp: |
| 349 logging.error('Failed to contact for handshake') | 383 logging.error('Failed to contact for handshake') |
| 350 else: | 384 else: |
| 351 logging.info('Connected to %s', resp.get('server_version')) | 385 logging.info('Connected to %s', resp.get('server_version')) |
| 352 if resp.get('bot_version') != botobj._attributes['version']: | 386 if resp.get('bot_version') != botobj._attributes['version']: |
| 353 logging.warning( | 387 logging.warning( |
| 354 'Found out we\'ll need to update: server said %s; we\'re %s', | 388 'Found out we\'ll need to update: server said %s; we\'re %s', |
| 355 resp.get('bot_version'), botobj._attributes['version']) | 389 resp.get('bot_version'), botobj._attributes['version']) |
| 356 | 390 |
| 357 if arg_error: | 391 if arg_error: |
| (...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 404 return 0 | 438 return 0 |
| 405 | 439 |
| 406 | 440 |
| 407 def poll_server(botobj, quit_bit): | 441 def poll_server(botobj, quit_bit): |
| 408 """Polls the server to run one loop. | 442 """Polls the server to run one loop. |
| 409 | 443 |
| 410 Returns True if executed some action, False if server asked the bot to sleep. | 444 Returns True if executed some action, False if server asked the bot to sleep. |
| 411 """ | 445 """ |
| 412 # Access to a protected member _XXX of a client class - pylint: disable=W0212 | 446 # Access to a protected member _XXX of a client class - pylint: disable=W0212 |
| 413 start = time.time() | 447 start = time.time() |
| 414 resp = net.url_read_json( | 448 resp = botobj.remote.url_read_json( |
| 415 botobj.server + '/swarming/api/v1/bot/poll', data=botobj._attributes) | 449 '/swarming/api/v1/bot/poll', data=botobj._attributes) |
| 416 if not resp: | 450 if not resp: |
| 417 return False | 451 return False |
| 418 logging.debug('Server response:\n%s', resp) | 452 logging.debug('Server response:\n%s', resp) |
| 419 | 453 |
| 420 cmd = resp['cmd'] | 454 cmd = resp['cmd'] |
| 421 if cmd == 'sleep': | 455 if cmd == 'sleep': |
| 422 quit_bit.wait(resp['duration']) | 456 quit_bit.wait(resp['duration']) |
| 423 return False | 457 return False |
| 424 | 458 |
| 425 if cmd == 'terminate': | 459 if cmd == 'terminate': |
| 426 quit_bit.set() | 460 quit_bit.set() |
| 427 # This is similar to post_update() in task_runner.py. | 461 # This is similar to post_update() in task_runner.py. |
| 428 params = { | 462 params = { |
| 429 'cost_usd': 0, | 463 'cost_usd': 0, |
| 430 'duration': 0, | 464 'duration': 0, |
| 431 'exit_code': 0, | 465 'exit_code': 0, |
| 432 'hard_timeout': False, | 466 'hard_timeout': False, |
| 433 'id': botobj.id, | 467 'id': botobj.id, |
| 434 'io_timeout': False, | 468 'io_timeout': False, |
| 435 'output': '', | 469 'output': '', |
| 436 'output_chunk_start': 0, | 470 'output_chunk_start': 0, |
| 437 'task_id': resp['task_id'], | 471 'task_id': resp['task_id'], |
| 438 } | 472 } |
| 439 net.url_read_json( | 473 botobj.remote.url_read_json( |
| 440 botobj.server + '/swarming/api/v1/bot/task_update/%s' % resp['task_id'], | 474 '/swarming/api/v1/bot/task_update/%s' % resp['task_id'], |
| 441 data=params) | 475 data=params) |
| 442 return False | 476 return False |
| 443 | 477 |
| 444 if cmd == 'run': | 478 if cmd == 'run': |
| 445 if run_manifest(botobj, resp['manifest'], start): | 479 if run_manifest(botobj, resp['manifest'], start): |
| 446 # Completed a task successfully so update swarming_bot.zip if necessary. | 480 # Completed a task successfully so update swarming_bot.zip if necessary. |
| 447 update_lkgbc(botobj) | 481 update_lkgbc(botobj) |
| 448 # Clean up cache after a task | 482 # Clean up cache after a task |
| 449 clean_isolated_cache(botobj) | 483 clean_isolated_cache(botobj) |
| 450 # TODO(maruel): Handle the case where quit_bit.is_set() happens here. This | 484 # TODO(maruel): Handle the case where quit_bit.is_set() happens here. This |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 489 if not manifest['command']: | 523 if not manifest['command']: |
| 490 hard_timeout += manifest['io_timeout'] or 600 | 524 hard_timeout += manifest['io_timeout'] or 600 |
| 491 | 525 |
| 492 url = manifest.get('host', botobj.server) | 526 url = manifest.get('host', botobj.server) |
| 493 task_dimensions = manifest['dimensions'] | 527 task_dimensions = manifest['dimensions'] |
| 494 task_result = {} | 528 task_result = {} |
| 495 | 529 |
| 496 failure = False | 530 failure = False |
| 497 internal_failure = False | 531 internal_failure = False |
| 498 msg = None | 532 msg = None |
| 533 auth_params_dumper = None |
| 499 work_dir = os.path.join(botobj.base_dir, 'work') | 534 work_dir = os.path.join(botobj.base_dir, 'work') |
| 500 try: | 535 try: |
| 501 try: | 536 try: |
| 502 if os.path.isdir(work_dir): | 537 if os.path.isdir(work_dir): |
| 503 file_path.rmtree(work_dir) | 538 file_path.rmtree(work_dir) |
| 504 except OSError: | 539 except OSError: |
| 505 # If a previous task created an undeleteable file/directory inside 'work', | 540 # If a previous task created an undeleteable file/directory inside 'work', |
| 506 # make sure that following tasks are not affected. This is done by working | 541 # make sure that following tasks are not affected. This is done by working |
| 507 # around the undeleteable directory by creating a temporary directory | 542 # around the undeleteable directory by creating a temporary directory |
| 508 # instead. This is not normal behavior. The bot will report a failure on | 543 # instead. This is not normal behavior. The bot will report a failure on |
| 509 # start. | 544 # start. |
| 510 work_dir = tempfile.mkdtemp(dir=botobj.base_dir, prefix='work') | 545 work_dir = tempfile.mkdtemp(dir=botobj.base_dir, prefix='work') |
| 511 else: | 546 else: |
| 512 os.makedirs(work_dir) | 547 os.makedirs(work_dir) |
| 513 | 548 |
| 514 env = os.environ.copy() | 549 env = os.environ.copy() |
| 515 # Windows in particular does not tolerate unicode strings in environment | 550 # Windows in particular does not tolerate unicode strings in environment |
| 516 # variables. | 551 # variables. |
| 517 env['SWARMING_TASK_ID'] = task_id.encode('ascii') | 552 env['SWARMING_TASK_ID'] = task_id.encode('ascii') |
| 518 | 553 |
| 519 task_in_file = os.path.join(work_dir, 'task_runner_in.json') | 554 task_in_file = os.path.join(work_dir, 'task_runner_in.json') |
| 520 with open(task_in_file, 'wb') as f: | 555 with open(task_in_file, 'wb') as f: |
| 521 f.write(json.dumps(manifest)) | 556 f.write(json.dumps(manifest)) |
| 522 call_hook(botobj, 'on_before_task') | 557 call_hook(botobj, 'on_before_task') |
| 523 task_result_file = os.path.join(work_dir, 'task_runner_out.json') | 558 task_result_file = os.path.join(work_dir, 'task_runner_out.json') |
| 524 if os.path.exists(task_result_file): | 559 if os.path.exists(task_result_file): |
| 525 os.remove(task_result_file) | 560 os.remove(task_result_file) |
| 561 |
| 562 # Start a thread that periodically puts authentication headers and other |
| 563 # authentication related information to a file on disk. task_runner and its |
| 564 # subprocesses read it from there before making authenticated HTTP calls. |
| 565 auth_params_file = os.path.join(work_dir, 'bot_auth_params.json') |
| 566 if botobj.remote.uses_auth: |
| 567 env['SWARMING_AUTH_PARAMS'] = str(auth_params_file) |
| 568 auth_params_dumper = file_refresher.FileRefresherThread( |
| 569 auth_params_file, lambda: bot_auth.prepare_auth_params_json(botobj)) |
| 570 auth_params_dumper.start() |
| 571 else: |
| 572 env.pop('SWARMING_AUTH_PARAMS', None) |
| 573 if os.path.exists(auth_params_file): |
| 574 os.remove(auth_params_file) |
| 575 |
| 526 command = [ | 576 command = [ |
| 527 sys.executable, THIS_FILE, 'task_runner', | 577 sys.executable, THIS_FILE, 'task_runner', |
| 528 '--swarming-server', url, | 578 '--swarming-server', url, |
| 529 '--in-file', task_in_file, | 579 '--in-file', task_in_file, |
| 530 '--out-file', task_result_file, | 580 '--out-file', task_result_file, |
| 531 '--cost-usd-hour', str(botobj.state.get('cost_usd_hour') or 0.), | 581 '--cost-usd-hour', str(botobj.state.get('cost_usd_hour') or 0.), |
| 532 # Include the time taken to poll the task in the cost. | 582 # Include the time taken to poll the task in the cost. |
| 533 '--start', str(start), | 583 '--start', str(start), |
| 534 '--min-free-space', str(get_min_free_space()), | 584 '--min-free-space', str(get_min_free_space()), |
| 535 ] | 585 ] |
| 536 logging.debug('Running command: %s', command) | 586 logging.debug('Running command: %s', command) |
| 587 |
| 537 # Put the output file into the current working directory, which should be | 588 # Put the output file into the current working directory, which should be |
| 538 # the one containing swarming_bot.zip. | 589 # the one containing swarming_bot.zip. |
| 539 log_path = os.path.join(botobj.base_dir, 'logs', 'task_runner_stdout.log') | 590 log_path = os.path.join(botobj.base_dir, 'logs', 'task_runner_stdout.log') |
| 540 os_utilities.roll_log(log_path) | 591 os_utilities.roll_log(log_path) |
| 541 os_utilities.trim_rolled_log(log_path) | 592 os_utilities.trim_rolled_log(log_path) |
| 542 with open(log_path, 'a+b') as f: | 593 with open(log_path, 'a+b') as f: |
| 543 proc = subprocess42.Popen( | 594 proc = subprocess42.Popen( |
| 544 command, | 595 command, |
| 545 detached=True, | 596 detached=True, |
| 546 cwd=botobj.base_dir, | 597 cwd=botobj.base_dir, |
| (...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 587 failure = bool(task_result.get('exit_code')) if task_result else False | 638 failure = bool(task_result.get('exit_code')) if task_result else False |
| 588 return not internal_failure and not failure | 639 return not internal_failure and not failure |
| 589 except Exception as e: | 640 except Exception as e: |
| 590 # Failures include IOError when writing if the disk is full, OSError if | 641 # Failures include IOError when writing if the disk is full, OSError if |
| 591 # swarming_bot.zip doesn't exist anymore, etc. | 642 # swarming_bot.zip doesn't exist anymore, etc. |
| 592 logging.exception('run_manifest failed') | 643 logging.exception('run_manifest failed') |
| 593 msg = 'Internal exception occured: %s\n%s' % ( | 644 msg = 'Internal exception occured: %s\n%s' % ( |
| 594 e, traceback.format_exc()[-2048:]) | 645 e, traceback.format_exc()[-2048:]) |
| 595 internal_failure = True | 646 internal_failure = True |
| 596 finally: | 647 finally: |
| 648 if auth_params_dumper: |
| 649 auth_params_dumper.stop() |
| 597 if internal_failure: | 650 if internal_failure: |
| 598 post_error_task(botobj, msg, task_id) | 651 post_error_task(botobj, msg, task_id) |
| 599 call_hook( | 652 call_hook( |
| 600 botobj, 'on_after_task', failure, internal_failure, task_dimensions, | 653 botobj, 'on_after_task', failure, internal_failure, task_dimensions, |
| 601 task_result) | 654 task_result) |
| 602 if os.path.isdir(work_dir): | 655 if os.path.isdir(work_dir): |
| 603 try: | 656 try: |
| 604 file_path.rmtree(work_dir) | 657 file_path.rmtree(work_dir) |
| 605 except Exception as e: | 658 except Exception as e: |
| 606 botobj.post_error( | 659 botobj.post_error( |
| (...skipping 10 matching lines...) Expand all Loading... |
| 617 | 670 |
| 618 Does not return. | 671 Does not return. |
| 619 """ | 672 """ |
| 620 # Alternate between .1.zip and .2.zip. | 673 # Alternate between .1.zip and .2.zip. |
| 621 new_zip = 'swarming_bot.1.zip' | 674 new_zip = 'swarming_bot.1.zip' |
| 622 if os.path.basename(THIS_FILE) == new_zip: | 675 if os.path.basename(THIS_FILE) == new_zip: |
| 623 new_zip = 'swarming_bot.2.zip' | 676 new_zip = 'swarming_bot.2.zip' |
| 624 new_zip = os.path.join(os.path.dirname(THIS_FILE), new_zip) | 677 new_zip = os.path.join(os.path.dirname(THIS_FILE), new_zip) |
| 625 | 678 |
| 626 # Download as a new file. | 679 # Download as a new file. |
| 627 url = botobj.server + '/swarming/api/v1/bot/bot_code/%s' % version | 680 url_path = '/swarming/api/v1/bot/bot_code/%s' % version |
| 628 if not net.url_retrieve(new_zip, url): | 681 if not botobj.remote.url_retrieve(new_zip, url_path): |
| 629 # It can happen when a server is rapidly updated multiple times in a row. | 682 # It can happen when a server is rapidly updated multiple times in a row. |
| 630 botobj.post_error( | 683 botobj.post_error( |
| 631 'Unable to download %s from %s; first tried version %s' % | 684 'Unable to download %s from %s; first tried version %s' % |
| 632 (new_zip, url, version)) | 685 (new_zip, botobj.server + url_path, version)) |
| 633 # Poll again, this may work next time. To prevent busy-loop, sleep a little. | 686 # Poll again, this may work next time. To prevent busy-loop, sleep a little. |
| 634 time.sleep(2) | 687 time.sleep(2) |
| 635 return | 688 return |
| 636 | 689 |
| 637 s = os.stat(new_zip) | 690 s = os.stat(new_zip) |
| 638 logging.info('Restarting to %s; %d bytes.', new_zip, s.st_size) | 691 logging.info('Restarting to %s; %d bytes.', new_zip, s.st_size) |
| 639 sys.stdout.flush() | 692 sys.stdout.flush() |
| 640 sys.stderr.flush() | 693 sys.stderr.flush() |
| 641 | 694 |
| 642 proc = subprocess42.Popen( | 695 proc = subprocess42.Popen( |
| (...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 737 os.path.dirname(THIS_FILE), 'logs', 'bot_std%s.log' % t) | 790 os.path.dirname(THIS_FILE), 'logs', 'bot_std%s.log' % t) |
| 738 os_utilities.roll_log(log_path) | 791 os_utilities.roll_log(log_path) |
| 739 os_utilities.trim_rolled_log(log_path) | 792 os_utilities.trim_rolled_log(log_path) |
| 740 | 793 |
| 741 error = None | 794 error = None |
| 742 if len(args) != 0: | 795 if len(args) != 0: |
| 743 error = 'Unexpected arguments: %s' % args | 796 error = 'Unexpected arguments: %s' % args |
| 744 try: | 797 try: |
| 745 return run_bot(error) | 798 return run_bot(error) |
| 746 finally: | 799 finally: |
| 747 call_hook(bot.Bot(None, None, None, os.path.dirname(THIS_FILE), None), | 800 call_hook(bot.Bot(None, None, None, None, os.path.dirname(THIS_FILE), None), |
| 748 'on_bot_shutdown') | 801 'on_bot_shutdown') |
| 749 logging.info('main() returning') | 802 logging.info('main() returning') |
| OLD | NEW |