| OLD | NEW |
| 1 #!/usr/bin/python | 1 #!/usr/bin/python |
| 2 # | 2 # |
| 3 # Copyright (c) 2010 The Chromium Authors. All rights reserved. | 3 # Copyright (c) 2010 The Chromium Authors. All rights reserved. |
| 4 # Use of this source code is governed by a BSD-style license that can be | 4 # Use of this source code is governed by a BSD-style license that can be |
| 5 # found in the LICENSE file. | 5 # found in the LICENSE file. |
| 6 | 6 |
| 7 """Wrapper that does auto-retry and stats logging for command invocation. | 7 """Wrapper that does auto-retry and stats logging for command invocation. |
| 8 | 8 |
| 9 Various command line tools in use: gsutil, curl have spurious failure. | 9 Various command line tools in use: gsutil, curl have spurious failure. |
| 10 This wrapper will track stats to an AppEngine based service to | 10 This wrapper will track stats to an AppEngine based service to |
| 11 help track down the cause of failures, as well as add retry logic. | 11 help track down the cause of failures, as well as add retry logic. |
| 12 """ | 12 """ |
| 13 | 13 |
| 14 | 14 |
| 15 import optparse | 15 import optparse |
| 16 import os | 16 import os |
| 17 import platform | 17 import platform |
| 18 import socket |
| 18 import subprocess | 19 import subprocess |
| 19 import sys | 20 import sys |
| 21 import threading |
| 20 import time | 22 import time |
| 21 import urllib | 23 import urllib |
| 22 import uuid | 24 import uuid |
| 23 | 25 |
| 24 | 26 |
| 27 LOG_TIMEOUT = 10 |
| 28 |
| 29 |
| 25 def LogCommand(options, command_id, | 30 def LogCommand(options, command_id, |
| 26 attempt, cmd, returncode, stdout, stderr, runtime): | 31 attempt, cmd, returncode, stdout, stderr, runtime): |
| 27 """Log a command invocation and result to a central location. | 32 """Log a command invocation and result to a central location. |
| 28 | 33 |
| 29 Arguments: | 34 Arguments: |
| 30 options: parsed options | 35 options: parsed options |
| 31 command_id: unique id for this command (shared by all retries) | 36 command_id: unique id for this command (shared by all retries) |
| 32 attempt: which try numbered from 0 | 37 attempt: which try numbered from 0 |
| 33 cmd: command run | 38 cmd: command run |
| 34 returncode: return code from running command | 39 returncode: return code from running command |
| (...skipping 18 matching lines...) Expand all Loading... |
| 53 'uname_sysname': uname[0], | 58 'uname_sysname': uname[0], |
| 54 'uname_nodename': uname[1], | 59 'uname_nodename': uname[1], |
| 55 'uname_release': uname[2], | 60 'uname_release': uname[2], |
| 56 'uname_version': uname[3], | 61 'uname_version': uname[3], |
| 57 'uname_machine': uname[4], | 62 'uname_machine': uname[4], |
| 58 'uname_processor': uname[5], | 63 'uname_processor': uname[5], |
| 59 }) | 64 }) |
| 60 f = urllib.urlopen(options.logurl, params) | 65 f = urllib.urlopen(options.logurl, params) |
| 61 ret = f.read() | 66 ret = f.read() |
| 62 f.close() | 67 f.close() |
| 63 return int(ret) != 0 | 68 try: |
| 69 return int(ret) != 0 |
| 70 except ValueError: |
| 71 return 0 |
| 72 |
| 73 |
| 74 def RunWithTimeout(timeout, func, *args, **kwargs): |
| 75 result = None |
| 76 def CallFunc(): |
| 77 result = func(*args, **kwargs) |
| 78 th = threading.Thread(target=CallFunc) |
| 79 th.start() |
| 80 th.join(timeout) |
| 81 return result |
| 64 | 82 |
| 65 | 83 |
| 66 def main(argv): | 84 def main(argv): |
| 67 parser = optparse.OptionParser() | 85 parser = optparse.OptionParser() |
| 68 parser.add_option('-r', '--retries', dest='retries', | 86 parser.add_option('-r', '--retries', dest='retries', |
| 69 type='int', default=10, | 87 type='int', default=10, |
| 70 help='number of times to retry on failure') | 88 help='number of times to retry on failure') |
| 71 parser.add_option('-u', '--logurl', dest='logurl', | 89 parser.add_option('-u', '--logurl', dest='logurl', |
| 72 default='https://command-wrapper.appspot.com/log', | 90 default='https://command-wrapper.appspot.com/log', |
| 73 help='URL to log invocations/failures to') | 91 help='URL to log invocations/failures to') |
| 74 (options, args) = parser.parse_args(args=argv[1:]) | 92 (options, args) = parser.parse_args(args=argv[1:]) |
| 75 | 93 |
| 94 # Limit tcp connnection timeouts to 10 seconds. |
| 95 socket.setdefaulttimeout(10) |
| 96 |
| 76 command_id = uuid.uuid1() | 97 command_id = uuid.uuid1() |
| 77 cmd = ' '.join(args) | 98 cmd = ' '.join(args) |
| 99 |
| 100 # Log that we're even starting. |
| 101 RunWithTimeout(LOG_TIMEOUT, LogCommand, |
| 102 options, command_id, -1, cmd, -1, '', '', 0) |
| 103 |
| 104 # Try up to a certain number of times. |
| 78 for r in range(options.retries): | 105 for r in range(options.retries): |
| 79 tm = time.time() | 106 tm = time.time() |
| 80 p = subprocess.Popen(cmd, shell=True, | 107 p = subprocess.Popen(cmd, shell=True, |
| 81 stdout=subprocess.PIPE, | 108 stdout=subprocess.PIPE, |
| 82 stderr=subprocess.PIPE) | 109 stderr=subprocess.PIPE) |
| 83 (p_stdout, p_stderr) = p.communicate() | 110 (p_stdout, p_stderr) = p.communicate() |
| 84 sys.stdout.write(p_stdout) | 111 sys.stdout.write(p_stdout) |
| 85 sys.stderr.write(p_stderr) | 112 sys.stderr.write(p_stderr) |
| 86 runtime = time.time() - tm | 113 runtime = time.time() - tm |
| 87 accept = LogCommand(options, command_id, r, cmd, | 114 accept = RunWithTimeout(LOG_TIMEOUT, LogCommand, |
| 88 p.returncode, p_stdout, p_stderr, runtime) | 115 options, command_id, r, cmd, |
| 116 p.returncode, p_stdout, p_stderr, runtime) |
| 89 if accept: | 117 if accept: |
| 90 return p.returncode | 118 return p.returncode |
| 91 if p.returncode == 0: | 119 if p.returncode == 0: |
| 92 return 0 | 120 return 0 |
| 93 print 'Command %s failed with retcode %d, try %d.' % ( | 121 print 'Command %s failed with retcode %d, try %d.' % ( |
| 94 ' '.join(args), p.returncode, r + 1) | 122 ' '.join(args), p.returncode, r + 1) |
| 95 print 'Command %s failed %d retries, giving up.' % ( | 123 print 'Command %s failed %d retries, giving up.' % ( |
| 96 ' '.join(args), options.retries) | 124 ' '.join(args), options.retries) |
| 97 | 125 |
| 98 return p.returncode | 126 return p.returncode |
| 99 | 127 |
| 100 | 128 |
| 101 if __name__ == '__main__': | 129 if __name__ == '__main__': |
| 102 sys.exit(main(sys.argv)) | 130 sys.exit(main(sys.argv)) |
| OLD | NEW |