| OLD | NEW | 
|---|
| 1 # Copyright 2013 The LUCI Authors. All rights reserved. | 1 # Copyright 2013 The LUCI Authors. All rights reserved. | 
| 2 # Use of this source code is governed under the Apache License, Version 2.0 | 2 # Use of this source code is governed under the Apache License, Version 2.0 | 
| 3 # that can be found in the LICENSE file. | 3 # that can be found in the LICENSE file. | 
| 4 | 4 | 
| 5 """Swarming bot main process. | 5 """Swarming bot main process. | 
| 6 | 6 | 
| 7 This is the program that communicates with the Swarming server, ensures the code | 7 This is the program that communicates with the Swarming server, ensures the code | 
| 8 is always up to date and executes a child process to run tasks and upload | 8 is always up to date and executes a child process to run tasks and upload | 
| 9 results back. | 9 results back. | 
| 10 | 10 | 
| 11 It manages self-update and rebooting the host in case of problems. | 11 It manages self-update and rebooting the host in case of problems. | 
| 12 | 12 | 
| 13 Set the environment variable SWARMING_LOAD_TEST=1 to disable the use of | 13 Set the environment variable SWARMING_LOAD_TEST=1 to disable the use of | 
| 14 server-provided bot_config.py. This permits safe load testing. | 14 server-provided bot_config.py. This permits safe load testing. | 
| 15 """ | 15 """ | 
| 16 | 16 | 
| 17 import contextlib | 17 import contextlib | 
| 18 import json | 18 import json | 
| 19 import logging | 19 import logging | 
| 20 import optparse | 20 import optparse | 
| 21 import os | 21 import os | 
| 22 import shutil | 22 import shutil | 
| 23 import signal |  | 
| 24 import sys | 23 import sys | 
| 25 import tempfile | 24 import tempfile | 
| 26 import threading | 25 import threading | 
| 27 import time | 26 import time | 
| 28 import traceback | 27 import traceback | 
| 29 import zipfile | 28 import zipfile | 
| 30 | 29 | 
| 31 import common | 30 import common | 
|  | 31 import file_refresher | 
|  | 32 import remote_client | 
| 32 import singleton | 33 import singleton | 
| 33 from api import bot | 34 from api import bot | 
| 34 from api import os_utilities | 35 from api import os_utilities | 
| 35 from utils import file_path | 36 from utils import file_path | 
| 36 from utils import net | 37 from utils import net | 
| 37 from utils import on_error | 38 from utils import on_error | 
| 38 from utils import subprocess42 | 39 from utils import subprocess42 | 
| 39 from utils import zip_package | 40 from utils import zip_package | 
| 40 | 41 | 
| 41 | 42 | 
| (...skipping 121 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 163     should_continue = bot_config.setup_bot(botobj) | 164     should_continue = bot_config.setup_bot(botobj) | 
| 164   except Exception as e: | 165   except Exception as e: | 
| 165     msg = '%s\n%s' % (e, traceback.format_exc()[-2048:]) | 166     msg = '%s\n%s' % (e, traceback.format_exc()[-2048:]) | 
| 166     botobj.post_error('bot_config.setup_bot() threw: %s' % msg) | 167     botobj.post_error('bot_config.setup_bot() threw: %s' % msg) | 
| 167     return | 168     return | 
| 168 | 169 | 
| 169   if not should_continue and not skip_reboot: | 170   if not should_continue and not skip_reboot: | 
| 170     botobj.restart('Starting new swarming bot: %s' % THIS_FILE) | 171     botobj.restart('Starting new swarming bot: %s' % THIS_FILE) | 
| 171 | 172 | 
| 172 | 173 | 
|  | 174 def get_authentication_headers(botobj): | 
|  | 175   """Calls bot_config.get_authentication_headers() if it is defined. | 
|  | 176 | 
|  | 177   Doesn't catch exceptions. | 
|  | 178   """ | 
|  | 179   if _in_load_test_mode(): | 
|  | 180     return (None, None) | 
|  | 181   logging.info('get_authentication_headers()') | 
|  | 182   from config import bot_config | 
|  | 183   func = getattr(bot_config, 'get_authentication_headers', None) | 
|  | 184   return func(botobj) if func else (None, None) | 
|  | 185 | 
|  | 186 | 
| 173 ### end of bot_config handler part. | 187 ### end of bot_config handler part. | 
| 174 | 188 | 
| 175 | 189 | 
| 176 def get_min_free_space(): | 190 def get_min_free_space(): | 
| 177   """Returns free disk space needed. | 191   """Returns free disk space needed. | 
| 178 | 192 | 
| 179   Add a "250 MiB slack space" for logs, temporary files and whatever other leak. | 193   Add a "250 MiB slack space" for logs, temporary files and whatever other leak. | 
| 180   """ | 194   """ | 
| 181   return int((os_utilities.get_min_free_space(THIS_FILE) + 250.) * 1024 * 1024) | 195   return int((os_utilities.get_min_free_space(THIS_FILE) + 250.) * 1024 * 1024) | 
| 182 | 196 | 
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 221         exception handler for incoming commands from the Swarming server. If for | 235         exception handler for incoming commands from the Swarming server. If for | 
| 222         any reason the local test runner script can not be run successfully, | 236         any reason the local test runner script can not be run successfully, | 
| 223         this function is invoked. | 237         this function is invoked. | 
| 224   """ | 238   """ | 
| 225   logging.error('Error: %s', error) | 239   logging.error('Error: %s', error) | 
| 226   data = { | 240   data = { | 
| 227     'id': botobj.id, | 241     'id': botobj.id, | 
| 228     'message': error, | 242     'message': error, | 
| 229     'task_id': task_id, | 243     'task_id': task_id, | 
| 230   } | 244   } | 
| 231   return net.url_read_json( | 245   return botobj.remote.url_read_json( | 
| 232       botobj.server + '/swarming/api/v1/bot/task_error/%s' % task_id, data=data) | 246       '/swarming/api/v1/bot/task_error/%s' % task_id, data=data) | 
| 233 | 247 | 
| 234 | 248 | 
| 235 def on_shutdown_hook(b): | 249 def on_shutdown_hook(b): | 
| 236   """Called when the bot is restarting.""" | 250   """Called when the bot is restarting.""" | 
| 237   call_hook(b, 'on_bot_shutdown') | 251   call_hook(b, 'on_bot_shutdown') | 
| 238   # Aggressively set itself up so we ensure the auto-reboot configuration is | 252   # Aggressively set itself up so we ensure the auto-reboot configuration is | 
| 239   # fine before restarting the host. This is important as some tasks delete the | 253   # fine before restarting the host. This is important as some tasks delete the | 
| 240   # autorestart script (!) | 254   # autorestart script (!) | 
| 241   setup_bot(True) | 255   setup_bot(True) | 
| 242 | 256 | 
| 243 | 257 | 
| 244 def get_bot(): | 258 def get_bot(): | 
| 245   """Returns a valid Bot instance. | 259   """Returns a valid Bot instance. | 
| 246 | 260 | 
| 247   Should only be called once in the process lifetime. | 261   Should only be called once in the process lifetime. | 
| 248   """ | 262   """ | 
| 249   # This variable is used to bootstrap the initial bot.Bot object, which then is | 263   # This variable is used to bootstrap the initial bot.Bot object, which then is | 
| 250   # used to get the dimensions and state. | 264   # used to get the dimensions and state. | 
| 251   attributes = { | 265   attributes = { | 
| 252     'dimensions': {u'id': ['none']}, | 266     'dimensions': {u'id': ['none']}, | 
| 253     'state': {}, | 267     'state': {}, | 
| 254     'version': generate_version(), | 268     'version': generate_version(), | 
| 255   } | 269   } | 
| 256   config = get_config() | 270   config = get_config() | 
| 257   assert not config['server'].endswith('/'), config | 271   assert not config['server'].endswith('/'), config | 
| 258 | 272 | 
| 259   # Create a temporary object to call the hooks. | 273   # Use temporary Bot object to call get_attributes. Attributes are needed to | 
|  | 274   # construct the "real" bot.Bot. | 
|  | 275   attributes = get_attributes( | 
|  | 276     bot.Bot( | 
|  | 277       remote_client.RemoteClient(config['server'], None), | 
|  | 278       attributes, | 
|  | 279       config['server'], | 
|  | 280       config['server_version'], | 
|  | 281       os.path.dirname(THIS_FILE), | 
|  | 282       on_shutdown_hook)) | 
|  | 283 | 
|  | 284   # Make remote client callback use the returned bot object. We assume here | 
|  | 285   # RemoteClient doesn't call its callback in the constructor (since 'botobj' is | 
|  | 286   # undefined during the construction). | 
| 260   botobj = bot.Bot( | 287   botobj = bot.Bot( | 
|  | 288       remote_client.RemoteClient( | 
|  | 289           config['server'], | 
|  | 290           lambda: get_authentication_headers(botobj)), | 
| 261       attributes, | 291       attributes, | 
| 262       config['server'], | 292       config['server'], | 
| 263       config['server_version'], | 293       config['server_version'], | 
| 264       os.path.dirname(THIS_FILE), | 294       os.path.dirname(THIS_FILE), | 
| 265       on_shutdown_hook) | 295       on_shutdown_hook) | 
| 266   return bot.Bot( | 296   return botobj | 
| 267       get_attributes(botobj), |  | 
| 268       config['server'], |  | 
| 269       config['server_version'], |  | 
| 270       os.path.dirname(THIS_FILE), |  | 
| 271       on_shutdown_hook) |  | 
| 272 | 297 | 
| 273 | 298 | 
| 274 def clean_isolated_cache(botobj): | 299 def clean_isolated_cache(botobj): | 
| 275   """Asks run_isolated to clean its cache. | 300   """Asks run_isolated to clean its cache. | 
| 276 | 301 | 
| 277   This may take a while but it ensures that in the case of a run_isolated run | 302   This may take a while but it ensures that in the case of a run_isolated run | 
| 278   failed and it temporarily used more space than min_free_disk, it can cleans up | 303   failed and it temporarily used more space than min_free_disk, it can cleans up | 
| 279   the mess properly. | 304   the mess properly. | 
| 280 | 305 | 
| 281   It will remove unexpected files, remove corrupted files, trim the cache size | 306   It will remove unexpected files, remove corrupted files, trim the cache size | 
| (...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 328       # First thing is to get an arbitrary url. This also ensures the network is | 353       # First thing is to get an arbitrary url. This also ensures the network is | 
| 329       # up and running, which is necessary before trying to get the FQDN below. | 354       # up and running, which is necessary before trying to get the FQDN below. | 
| 330       resp = net.url_read(config['server'] + '/swarming/api/v1/bot/server_ping') | 355       resp = net.url_read(config['server'] + '/swarming/api/v1/bot/server_ping') | 
| 331       if resp is None: | 356       if resp is None: | 
| 332         logging.error('No response from server_ping') | 357         logging.error('No response from server_ping') | 
| 333     except Exception as e: | 358     except Exception as e: | 
| 334       # url_read() already traps pretty much every exceptions. This except | 359       # url_read() already traps pretty much every exceptions. This except | 
| 335       # clause is kept there "just in case". | 360       # clause is kept there "just in case". | 
| 336       logging.exception('server_ping threw') | 361       logging.exception('server_ping threw') | 
| 337 | 362 | 
|  | 363     # Next we make sure the bot can make authenticated calls by grabbing | 
|  | 364     # the auth headers, retrying on errors a bunch of times. We don't give up | 
|  | 365     # if it fails though (maybe the bot will "fix itself" later). | 
|  | 366     botobj = get_bot() | 
|  | 367     try: | 
|  | 368       botobj.remote.initialize(quit_bit) | 
|  | 369     except remote_client.InitializationError as exc: | 
|  | 370       botobj.post_error('failed to grab auth headers: %s' % exc.last_error) | 
|  | 371       logging.error('Can\'t grab auth headers, continuing anyway...') | 
|  | 372 | 
| 338     if quit_bit.is_set(): | 373     if quit_bit.is_set(): | 
| 339       logging.info('Early quit 1') | 374       logging.info('Early quit 1') | 
| 340       return 0 | 375       return 0 | 
| 341 | 376 | 
| 342     # If this fails, there's hardly anything that can be done, the bot can't | 377     # If this fails, there's hardly anything that can be done, the bot can't | 
| 343     # even get to the point to be able to self-update. | 378     # even get to the point to be able to self-update. | 
| 344     botobj = get_bot() | 379     resp = botobj.remote.url_read_json( | 
| 345     resp = net.url_read_json( | 380         '/swarming/api/v1/bot/handshake', data=botobj._attributes) | 
| 346         botobj.server + '/swarming/api/v1/bot/handshake', |  | 
| 347         data=botobj._attributes) |  | 
| 348     if not resp: | 381     if not resp: | 
| 349       logging.error('Failed to contact for handshake') | 382       logging.error('Failed to contact for handshake') | 
| 350     else: | 383     else: | 
| 351       logging.info('Connected to %s', resp.get('server_version')) | 384       logging.info('Connected to %s', resp.get('server_version')) | 
| 352       if resp.get('bot_version') != botobj._attributes['version']: | 385       if resp.get('bot_version') != botobj._attributes['version']: | 
| 353         logging.warning( | 386         logging.warning( | 
| 354             'Found out we\'ll need to update: server said %s; we\'re %s', | 387             'Found out we\'ll need to update: server said %s; we\'re %s', | 
| 355             resp.get('bot_version'), botobj._attributes['version']) | 388             resp.get('bot_version'), botobj._attributes['version']) | 
| 356 | 389 | 
| 357     if arg_error: | 390     if arg_error: | 
| (...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 404   return 0 | 437   return 0 | 
| 405 | 438 | 
| 406 | 439 | 
| 407 def poll_server(botobj, quit_bit): | 440 def poll_server(botobj, quit_bit): | 
| 408   """Polls the server to run one loop. | 441   """Polls the server to run one loop. | 
| 409 | 442 | 
| 410   Returns True if executed some action, False if server asked the bot to sleep. | 443   Returns True if executed some action, False if server asked the bot to sleep. | 
| 411   """ | 444   """ | 
| 412   # Access to a protected member _XXX of a client class - pylint: disable=W0212 | 445   # Access to a protected member _XXX of a client class - pylint: disable=W0212 | 
| 413   start = time.time() | 446   start = time.time() | 
| 414   resp = net.url_read_json( | 447   resp = botobj.remote.url_read_json( | 
| 415      botobj.server + '/swarming/api/v1/bot/poll', data=botobj._attributes) | 448       '/swarming/api/v1/bot/poll', data=botobj._attributes) | 
| 416   if not resp: | 449   if not resp: | 
| 417     return False | 450     return False | 
| 418   logging.debug('Server response:\n%s', resp) | 451   logging.debug('Server response:\n%s', resp) | 
| 419 | 452 | 
| 420   cmd = resp['cmd'] | 453   cmd = resp['cmd'] | 
| 421   if cmd == 'sleep': | 454   if cmd == 'sleep': | 
| 422     quit_bit.wait(resp['duration']) | 455     quit_bit.wait(resp['duration']) | 
| 423     return False | 456     return False | 
| 424 | 457 | 
| 425   if cmd == 'terminate': | 458   if cmd == 'terminate': | 
| 426     quit_bit.set() | 459     quit_bit.set() | 
| 427     # This is similar to post_update() in task_runner.py. | 460     # This is similar to post_update() in task_runner.py. | 
| 428     params = { | 461     params = { | 
| 429       'cost_usd': 0, | 462       'cost_usd': 0, | 
| 430       'duration': 0, | 463       'duration': 0, | 
| 431       'exit_code': 0, | 464       'exit_code': 0, | 
| 432       'hard_timeout': False, | 465       'hard_timeout': False, | 
| 433       'id': botobj.id, | 466       'id': botobj.id, | 
| 434       'io_timeout': False, | 467       'io_timeout': False, | 
| 435       'output': '', | 468       'output': '', | 
| 436       'output_chunk_start': 0, | 469       'output_chunk_start': 0, | 
| 437       'task_id': resp['task_id'], | 470       'task_id': resp['task_id'], | 
| 438     } | 471     } | 
| 439     net.url_read_json( | 472     botobj.remote.url_read_json( | 
| 440         botobj.server + '/swarming/api/v1/bot/task_update/%s' % resp['task_id'], | 473         '/swarming/api/v1/bot/task_update/%s' % resp['task_id'], | 
| 441         data=params) | 474         data=params) | 
| 442     return False | 475     return False | 
| 443 | 476 | 
| 444   if cmd == 'run': | 477   if cmd == 'run': | 
| 445     if run_manifest(botobj, resp['manifest'], start): | 478     if run_manifest(botobj, resp['manifest'], start): | 
| 446       # Completed a task successfully so update swarming_bot.zip if necessary. | 479       # Completed a task successfully so update swarming_bot.zip if necessary. | 
| 447       update_lkgbc(botobj) | 480       update_lkgbc(botobj) | 
| 448     # Clean up cache after a task | 481     # Clean up cache after a task | 
| 449     clean_isolated_cache(botobj) | 482     clean_isolated_cache(botobj) | 
| 450     # TODO(maruel): Handle the case where quit_bit.is_set() happens here. This | 483     # TODO(maruel): Handle the case where quit_bit.is_set() happens here. This | 
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 489     if not manifest['command']: | 522     if not manifest['command']: | 
| 490       hard_timeout += manifest['io_timeout'] or 600 | 523       hard_timeout += manifest['io_timeout'] or 600 | 
| 491 | 524 | 
| 492   url = manifest.get('host', botobj.server) | 525   url = manifest.get('host', botobj.server) | 
| 493   task_dimensions = manifest['dimensions'] | 526   task_dimensions = manifest['dimensions'] | 
| 494   task_result = {} | 527   task_result = {} | 
| 495 | 528 | 
| 496   failure = False | 529   failure = False | 
| 497   internal_failure = False | 530   internal_failure = False | 
| 498   msg = None | 531   msg = None | 
|  | 532   headers_dumper = None | 
| 499   work_dir = os.path.join(botobj.base_dir, 'work') | 533   work_dir = os.path.join(botobj.base_dir, 'work') | 
| 500   try: | 534   try: | 
| 501     try: | 535     try: | 
| 502       if os.path.isdir(work_dir): | 536       if os.path.isdir(work_dir): | 
| 503         file_path.rmtree(work_dir) | 537         file_path.rmtree(work_dir) | 
| 504     except OSError: | 538     except OSError: | 
| 505       # If a previous task created an undeleteable file/directory inside 'work', | 539       # If a previous task created an undeleteable file/directory inside 'work', | 
| 506       # make sure that following tasks are not affected. This is done by working | 540       # make sure that following tasks are not affected. This is done by working | 
| 507       # around the undeleteable directory by creating a temporary directory | 541       # around the undeleteable directory by creating a temporary directory | 
| 508       # instead. This is not normal behavior. The bot will report a failure on | 542       # instead. This is not normal behavior. The bot will report a failure on | 
| 509       # start. | 543       # start. | 
| 510       work_dir = tempfile.mkdtemp(dir=botobj.base_dir, prefix='work') | 544       work_dir = tempfile.mkdtemp(dir=botobj.base_dir, prefix='work') | 
| 511     else: | 545     else: | 
| 512       os.makedirs(work_dir) | 546       os.makedirs(work_dir) | 
| 513 | 547 | 
| 514     env = os.environ.copy() | 548     env = os.environ.copy() | 
| 515     # Windows in particular does not tolerate unicode strings in environment | 549     # Windows in particular does not tolerate unicode strings in environment | 
| 516     # variables. | 550     # variables. | 
| 517     env['SWARMING_TASK_ID'] = task_id.encode('ascii') | 551     env['SWARMING_TASK_ID'] = task_id.encode('ascii') | 
| 518 | 552 | 
| 519     task_in_file = os.path.join(work_dir, 'task_runner_in.json') | 553     task_in_file = os.path.join(work_dir, 'task_runner_in.json') | 
| 520     with open(task_in_file, 'wb') as f: | 554     with open(task_in_file, 'wb') as f: | 
| 521       f.write(json.dumps(manifest)) | 555       f.write(json.dumps(manifest)) | 
| 522     call_hook(botobj, 'on_before_task') | 556     call_hook(botobj, 'on_before_task') | 
| 523     task_result_file = os.path.join(work_dir, 'task_runner_out.json') | 557     task_result_file = os.path.join(work_dir, 'task_runner_out.json') | 
| 524     if os.path.exists(task_result_file): | 558     if os.path.exists(task_result_file): | 
| 525       os.remove(task_result_file) | 559       os.remove(task_result_file) | 
|  | 560 | 
|  | 561     # Start a thread that periodically puts authentication headers to a file on | 
|  | 562     # disk. task_runner reads them from there before making HTTP calls to the | 
|  | 563     # swarming server. | 
|  | 564     headers_file = os.path.join(work_dir, 'bot_auth_headers.json') | 
|  | 565     if botobj.remote.uses_auth: | 
|  | 566       headers_dumper = file_refresher.FileRefresherThread( | 
|  | 567           headers_file, botobj.remote.get_authentication_headers) | 
|  | 568       headers_dumper.start() | 
|  | 569 | 
| 526     command = [ | 570     command = [ | 
| 527       sys.executable, THIS_FILE, 'task_runner', | 571       sys.executable, THIS_FILE, 'task_runner', | 
| 528       '--swarming-server', url, | 572       '--swarming-server', url, | 
| 529       '--in-file', task_in_file, | 573       '--in-file', task_in_file, | 
| 530       '--out-file', task_result_file, | 574       '--out-file', task_result_file, | 
| 531       '--cost-usd-hour', str(botobj.state.get('cost_usd_hour') or 0.), | 575       '--cost-usd-hour', str(botobj.state.get('cost_usd_hour') or 0.), | 
| 532       # Include the time taken to poll the task in the cost. | 576       # Include the time taken to poll the task in the cost. | 
| 533       '--start', str(start), | 577       '--start', str(start), | 
| 534       '--min-free-space', str(get_min_free_space()), | 578       '--min-free-space', str(get_min_free_space()), | 
| 535     ] | 579     ] | 
|  | 580     if headers_dumper: | 
|  | 581       command.extend(['--auth-headers-file', headers_file]) | 
| 536     logging.debug('Running command: %s', command) | 582     logging.debug('Running command: %s', command) | 
|  | 583 | 
| 537     # Put the output file into the current working directory, which should be | 584     # Put the output file into the current working directory, which should be | 
| 538     # the one containing swarming_bot.zip. | 585     # the one containing swarming_bot.zip. | 
| 539     log_path = os.path.join(botobj.base_dir, 'logs', 'task_runner_stdout.log') | 586     log_path = os.path.join(botobj.base_dir, 'logs', 'task_runner_stdout.log') | 
| 540     os_utilities.roll_log(log_path) | 587     os_utilities.roll_log(log_path) | 
| 541     os_utilities.trim_rolled_log(log_path) | 588     os_utilities.trim_rolled_log(log_path) | 
| 542     with open(log_path, 'a+b') as f: | 589     with open(log_path, 'a+b') as f: | 
| 543       proc = subprocess42.Popen( | 590       proc = subprocess42.Popen( | 
| 544           command, | 591           command, | 
| 545           detached=True, | 592           detached=True, | 
| 546           cwd=botobj.base_dir, | 593           cwd=botobj.base_dir, | 
| (...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 587     failure = bool(task_result.get('exit_code')) if task_result else False | 634     failure = bool(task_result.get('exit_code')) if task_result else False | 
| 588     return not internal_failure and not failure | 635     return not internal_failure and not failure | 
| 589   except Exception as e: | 636   except Exception as e: | 
| 590     # Failures include IOError when writing if the disk is full, OSError if | 637     # Failures include IOError when writing if the disk is full, OSError if | 
| 591     # swarming_bot.zip doesn't exist anymore, etc. | 638     # swarming_bot.zip doesn't exist anymore, etc. | 
| 592     logging.exception('run_manifest failed') | 639     logging.exception('run_manifest failed') | 
| 593     msg = 'Internal exception occured: %s\n%s' % ( | 640     msg = 'Internal exception occured: %s\n%s' % ( | 
| 594         e, traceback.format_exc()[-2048:]) | 641         e, traceback.format_exc()[-2048:]) | 
| 595     internal_failure = True | 642     internal_failure = True | 
| 596   finally: | 643   finally: | 
|  | 644     if headers_dumper: | 
|  | 645       headers_dumper.stop() | 
| 597     if internal_failure: | 646     if internal_failure: | 
| 598       post_error_task(botobj, msg, task_id) | 647       post_error_task(botobj, msg, task_id) | 
| 599     call_hook( | 648     call_hook( | 
| 600         botobj, 'on_after_task', failure, internal_failure, task_dimensions, | 649         botobj, 'on_after_task', failure, internal_failure, task_dimensions, | 
| 601         task_result) | 650         task_result) | 
| 602     if os.path.isdir(work_dir): | 651     if os.path.isdir(work_dir): | 
| 603       try: | 652       try: | 
| 604         file_path.rmtree(work_dir) | 653         file_path.rmtree(work_dir) | 
| 605       except Exception as e: | 654       except Exception as e: | 
| 606         botobj.post_error( | 655         botobj.post_error( | 
| (...skipping 10 matching lines...) Expand all  Loading... | 
| 617 | 666 | 
| 618   Does not return. | 667   Does not return. | 
| 619   """ | 668   """ | 
| 620   # Alternate between .1.zip and .2.zip. | 669   # Alternate between .1.zip and .2.zip. | 
| 621   new_zip = 'swarming_bot.1.zip' | 670   new_zip = 'swarming_bot.1.zip' | 
| 622   if os.path.basename(THIS_FILE) == new_zip: | 671   if os.path.basename(THIS_FILE) == new_zip: | 
| 623     new_zip = 'swarming_bot.2.zip' | 672     new_zip = 'swarming_bot.2.zip' | 
| 624   new_zip = os.path.join(os.path.dirname(THIS_FILE), new_zip) | 673   new_zip = os.path.join(os.path.dirname(THIS_FILE), new_zip) | 
| 625 | 674 | 
| 626   # Download as a new file. | 675   # Download as a new file. | 
| 627   url = botobj.server + '/swarming/api/v1/bot/bot_code/%s' % version | 676   url_path = '/swarming/api/v1/bot/bot_code/%s' % version | 
| 628   if not net.url_retrieve(new_zip, url): | 677   if not botobj.remote.url_retrieve(new_zip, url_path): | 
| 629     # It can happen when a server is rapidly updated multiple times in a row. | 678     # It can happen when a server is rapidly updated multiple times in a row. | 
| 630     botobj.post_error( | 679     botobj.post_error( | 
| 631         'Unable to download %s from %s; first tried version %s' % | 680         'Unable to download %s from %s; first tried version %s' % | 
| 632         (new_zip, url, version)) | 681         (new_zip, botobj.server + url_path, version)) | 
| 633     # Poll again, this may work next time. To prevent busy-loop, sleep a little. | 682     # Poll again, this may work next time. To prevent busy-loop, sleep a little. | 
| 634     time.sleep(2) | 683     time.sleep(2) | 
| 635     return | 684     return | 
| 636 | 685 | 
| 637   s = os.stat(new_zip) | 686   s = os.stat(new_zip) | 
| 638   logging.info('Restarting to %s; %d bytes.', new_zip, s.st_size) | 687   logging.info('Restarting to %s; %d bytes.', new_zip, s.st_size) | 
| 639   sys.stdout.flush() | 688   sys.stdout.flush() | 
| 640   sys.stderr.flush() | 689   sys.stderr.flush() | 
| 641 | 690 | 
| 642   proc = subprocess42.Popen( | 691   proc = subprocess42.Popen( | 
| (...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 737         os.path.dirname(THIS_FILE), 'logs', 'bot_std%s.log' % t) | 786         os.path.dirname(THIS_FILE), 'logs', 'bot_std%s.log' % t) | 
| 738     os_utilities.roll_log(log_path) | 787     os_utilities.roll_log(log_path) | 
| 739     os_utilities.trim_rolled_log(log_path) | 788     os_utilities.trim_rolled_log(log_path) | 
| 740 | 789 | 
| 741   error = None | 790   error = None | 
| 742   if len(args) != 0: | 791   if len(args) != 0: | 
| 743     error = 'Unexpected arguments: %s' % args | 792     error = 'Unexpected arguments: %s' % args | 
| 744   try: | 793   try: | 
| 745     return run_bot(error) | 794     return run_bot(error) | 
| 746   finally: | 795   finally: | 
| 747     call_hook(bot.Bot(None, None, None, os.path.dirname(THIS_FILE), None), | 796     call_hook(bot.Bot(None, None, None, None, os.path.dirname(THIS_FILE), None), | 
| 748               'on_bot_shutdown') | 797               'on_bot_shutdown') | 
| 749     logging.info('main() returning') | 798     logging.info('main() returning') | 
| OLD | NEW | 
|---|