Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(104)

Side by Side Diff: appengine/swarming/handlers_bot.py

Issue 2689483004: swarming: Add server-side implementation for supplemental bot_config (Closed)
Patch Set: Rebased Created 3 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | appengine/swarming/handlers_bot_test.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2015 The LUCI Authors. All rights reserved. 1 # Copyright 2015 The LUCI Authors. All rights reserved.
2 # Use of this source code is governed under the Apache License, Version 2.0 2 # Use of this source code is governed under the Apache License, Version 2.0
3 # that can be found in the LICENSE file. 3 # that can be found in the LICENSE file.
4 4
5 """Internal bot API handlers.""" 5 """Internal bot API handlers."""
6 6
7 import base64 7 import base64
8 import json 8 import json
9 import logging 9 import logging
10 import re 10 import re
(...skipping 419 matching lines...) Expand 10 before | Expand all | Expand 10 after
430 430
431 data = { 431 data = {
432 'bot_version': bot_code.get_bot_version(self.get_bot_contact_server())[0], 432 'bot_version': bot_code.get_bot_version(self.get_bot_contact_server())[0],
433 'server_version': utils.get_app_version(), 433 'server_version': utils.get_app_version(),
434 'bot_group_cfg_version': res.bot_group_cfg.version, 434 'bot_group_cfg_version': res.bot_group_cfg.version,
435 'bot_group_cfg': { 435 'bot_group_cfg': {
436 # Let the bot know its server-side dimensions (from bots.cfg file). 436 # Let the bot know its server-side dimensions (from bots.cfg file).
437 'dimensions': res.bot_group_cfg.dimensions, 437 'dimensions': res.bot_group_cfg.dimensions,
438 }, 438 },
439 } 439 }
440 if res.bot_group_cfg.bot_config_script_content:
441 logging.info(
442 'Injecting %s: %d bytes',
443 res.bot_group_cfg.bot_config_script,
444 len(res.bot_group_cfg.bot_config_script_content))
445 data['bot_config'] = res.bot_group_cfg.bot_config_script_content
440 self.send_response(data) 446 self.send_response(data)
441 447
442 448
443 class BotPollHandler(_BotBaseHandler): 449 class BotPollHandler(_BotBaseHandler):
444 """The bot polls for a task; returns either a task, update command or sleep. 450 """The bot polls for a task; returns either a task, update command or sleep.
445 451
446 In case of exception on the bot, this is enough to get it just far enough to 452 In case of exception on the bot, this is enough to get it just far enough to
447 eventually self-update to a working version. This is to ensure that coding 453 eventually self-update to a working version. This is to ensure that coding
448 errors in bot code doesn't kill all the fleet at once, they should still be up 454 errors in bot code doesn't kill all the fleet at once, they should still be up
449 just enough to be able to self-update again even if they don't get task 455 just enough to be able to self-update again even if they don't get task
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
498 self._cmd_sleep(sleep_streak, quarantined) 504 self._cmd_sleep(sleep_streak, quarantined)
499 return 505 return
500 506
501 # If the server-side per-bot config for the bot has changed, we need 507 # If the server-side per-bot config for the bot has changed, we need
502 # to restart this particular bot, so it picks up new config in /handshake. 508 # to restart this particular bot, so it picks up new config in /handshake.
503 # Do this check only for bots that know about server-side per-bot configs 509 # Do this check only for bots that know about server-side per-bot configs
504 # already (such bots send 'bot_group_cfg_version' state attribute). 510 # already (such bots send 'bot_group_cfg_version' state attribute).
505 cur_bot_cfg_ver = res.state.get('bot_group_cfg_version') 511 cur_bot_cfg_ver = res.state.get('bot_group_cfg_version')
506 if cur_bot_cfg_ver and cur_bot_cfg_ver != res.bot_group_cfg.version: 512 if cur_bot_cfg_ver and cur_bot_cfg_ver != res.bot_group_cfg.version:
507 bot_event('request_restart') 513 bot_event('request_restart')
508 self._cmd_restart('Restarting to pick up new bots.cfg config') 514 self._cmd_bot_restart('Restarting to pick up new bots.cfg config')
509 return 515 return
510 516
511 # 517 #
512 # At that point, the bot should be in relatively good shape since it's 518 # At that point, the bot should be in relatively good shape since it's
513 # running the right version. It is still possible that invalid code was 519 # running the right version. It is still possible that invalid code was
514 # pushed to the server, so be diligent about it. 520 # pushed to the server, so be diligent about it.
515 # 521 #
516 522
523 # TODO(maruel): Remove this and migrate all use cases in bot_config.py
524 # on_bot_idle().
517 # Bot may need a reboot if it is running for too long. We do not reboot 525 # Bot may need a reboot if it is running for too long. We do not reboot
518 # quarantined bots. 526 # quarantined bots.
519 needs_restart, restart_message = bot_management.should_restart_bot( 527 needs_restart, restart_message = bot_management.should_restart_bot(
520 res.bot_id, res.state) 528 res.bot_id, res.state)
521 if needs_restart: 529 if needs_restart:
522 bot_event('request_restart') 530 bot_event('request_restart')
523 self._cmd_restart(restart_message) 531 self._cmd_host_reboot(restart_message)
524 return 532 return
525 533
526 # The bot is in good shape. Try to grab a task. 534 # The bot is in good shape. Try to grab a task.
527 try: 535 try:
528 # This is a fairly complex function call, exceptions are expected. 536 # This is a fairly complex function call, exceptions are expected.
529 request, secret_bytes, run_result = task_scheduler.bot_reap_task( 537 request, secret_bytes, run_result = task_scheduler.bot_reap_task(
530 res.dimensions, res.bot_id, res.version, 538 res.dimensions, res.bot_id, res.version,
531 res.state.get('lease_expiration_ts')) 539 res.state.get('lease_expiration_ts'))
532 if not request: 540 if not request:
533 # No task found, tell it to sleep a bit. 541 # No task found, tell it to sleep a bit.
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after
617 } 625 }
618 self.send_response(out) 626 self.send_response(out)
619 627
620 def _cmd_update(self, expected_version): 628 def _cmd_update(self, expected_version):
621 out = { 629 out = {
622 'cmd': 'update', 630 'cmd': 'update',
623 'version': expected_version, 631 'version': expected_version,
624 } 632 }
625 self.send_response(out) 633 self.send_response(out)
626 634
627 def _cmd_restart(self, message): 635 def _cmd_host_reboot(self, message):
628 logging.info('Rebooting bot: %s', message) 636 logging.info('Rebooting host: %s', message)
629 out = { 637 out = {
630 'cmd': 'restart', 638 'cmd': 'host_reboot',
631 'message': message, 639 'message': message,
632 } 640 }
633 self.send_response(out) 641 self.send_response(out)
642
643 def _cmd_bot_restart(self, message):
644 logging.info('Restarting bot: %s', message)
645 out = {
646 'cmd': 'bot_restart',
647 'message': message,
648 }
649 self.send_response(out)
634 650
635 651
636 class BotEventHandler(_BotBaseHandler): 652 class BotEventHandler(_BotBaseHandler):
637 """On signal that a bot had an event worth logging.""" 653 """On signal that a bot had an event worth logging."""
638 654
639 EXPECTED_KEYS = _BotBaseHandler.EXPECTED_KEYS | {u'event', u'message'} 655 EXPECTED_KEYS = _BotBaseHandler.EXPECTED_KEYS | {u'event', u'message'}
640 656
641 ALLOWED_EVENTS = ('bot_error', 'bot_log', 'bot_rebooting', 'bot_shutdown') 657 ALLOWED_EVENTS = ('bot_error', 'bot_log', 'bot_rebooting', 'bot_shutdown')
642 658
643 @auth.public # auth happens in self._process() 659 @auth.public # auth happens in self._process()
(...skipping 264 matching lines...) Expand 10 before | Expand all | Expand 10 after
908 ('/swarming/api/v1/bot/poll', BotPollHandler), 924 ('/swarming/api/v1/bot/poll', BotPollHandler),
909 ('/swarming/api/v1/bot/server_ping', ServerPingHandler), 925 ('/swarming/api/v1/bot/server_ping', ServerPingHandler),
910 ('/swarming/api/v1/bot/task_update', BotTaskUpdateHandler), 926 ('/swarming/api/v1/bot/task_update', BotTaskUpdateHandler),
911 ('/swarming/api/v1/bot/task_update/<task_id:[a-f0-9]+>', 927 ('/swarming/api/v1/bot/task_update/<task_id:[a-f0-9]+>',
912 BotTaskUpdateHandler), 928 BotTaskUpdateHandler),
913 ('/swarming/api/v1/bot/task_error', BotTaskErrorHandler), 929 ('/swarming/api/v1/bot/task_error', BotTaskErrorHandler),
914 ('/swarming/api/v1/bot/task_error/<task_id:[a-f0-9]+>', 930 ('/swarming/api/v1/bot/task_error/<task_id:[a-f0-9]+>',
915 BotTaskErrorHandler), 931 BotTaskErrorHandler),
916 ] 932 ]
917 return [webapp2.Route(*i) for i in routes] 933 return [webapp2.Route(*i) for i in routes]
OLDNEW
« no previous file with comments | « no previous file | appengine/swarming/handlers_bot_test.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698