| OLD | NEW |
| 1 #!/usr/bin/python | 1 #!/usr/bin/python |
| 2 # | |
| 3 # Copyright 2015 The Chromium Authors. All rights reserved. | 2 # Copyright 2015 The Chromium Authors. All rights reserved. |
| 4 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
| 5 # found in the LICENSE file. | 4 # found in the LICENSE file. |
| 6 | 5 |
| 7 """Waits for any one job out of a list to complete or a default timeout.""" | 6 """Waits for any one job out of a list to complete or a default timeout.""" |
| 8 | 7 |
| 9 import json | 8 import json |
| 9 import os |
| 10 import subprocess | 10 import subprocess |
| 11 import sys | 11 import sys |
| 12 import time | 12 import time |
| 13 import urllib2 | |
| 14 | 13 |
| 15 import check_buildbot | 14 import check_buildbot |
| 16 | 15 |
| 16 # Return codes. |
| 17 COMPLETED, FAILED, TIMED_OUT, BAD_ARGS = 0, 1, 2, 3 |
| 17 | 18 |
| 18 # The following intervals are specified in seconds, are expected to be sent as | 19 # The following intervals are specified in seconds, are expected to be sent as |
| 19 # arguments to time.sleep() | 20 # arguments to time.sleep() |
| 20 # All URLs are checked in sequence separated by 'short' interval seconds, to | 21 |
| 21 # prevent possibly getting throttled by whatever endpoint gsutil or urllib are | |
| 22 # hitting. | |
| 23 SHORT_INTERVAL = 0.4 | |
| 24 # If none of the URLs is determined to be ready, we sleep for a 'long' | 22 # If none of the URLs is determined to be ready, we sleep for a 'long' |
| 25 # interval. | 23 # interval. |
| 26 LONG_INTERVAL = 60 | 24 SLEEP_INTERVAL = 60 |
| 27 # We should check buildbot not more often than every 10 minutes. | 25 # We should check buildbot not more often than every 10 minutes. |
| 28 BUILDBOT_CHECK_FREQUENCY = 600 | 26 BUILDBOT_CHECK_INTERVAL = 600 |
| 29 # If the 'timeout' interval elapses without any URL becoming ready, we fail. | 27 |
| 30 timeout_interval = 60 * 60 | |
| 31 # Global gsutil path, expected to be set by main. | |
| 32 gsutil_path = '' | |
| 33 next_buildbot_check_due_time = 0 | 28 next_buildbot_check_due_time = 0 |
| 34 | 29 |
| 35 | 30 |
| 36 def _run_gsutil(cmd): | 31 def _print_usage(argv): |
| 37 # Sleep for a short time between gsutil calls | 32 usage = 'Usage: %s <gsutil path> [--timeout=<seconds>]' |
| 38 time.sleep(SHORT_INTERVAL) | 33 print usage % argv[0] |
| 39 cmd = [gsutil_path] + cmd | 34 print 'main.__doc__' |
| 40 try: | 35 print main.__doc__ |
| 41 out = subprocess.check_output(cmd) | 36 return BAD_ARGS |
| 42 return 0, out | 37 |
| 43 except subprocess.CalledProcessError as cpe: | 38 |
| 44 return cpe.returncode, cpe.output | 39 def _gs_file_exists(gsutil_path, url): |
| 45 | |
| 46 | |
| 47 def _gs_file_exists(url): | |
| 48 """Checks that running 'gsutil ls' returns 0 to see if file at url exists.""" | 40 """Checks that running 'gsutil ls' returns 0 to see if file at url exists.""" |
| 49 return _run_gsutil(['ls', url])[0] == 0 | 41 cmd = [gsutil_path, 'ls', url] |
| 42 error = subprocess.call(cmd, stdout=open(os.devnull, 'wb')) |
| 43 return not error |
| 50 | 44 |
| 51 | 45 |
| 52 def _next_buildbot_check_due(): | 46 def _next_buildbot_check_due(): |
| 47 """To limit how often we pull the [potentially big] json object from bb.""" |
| 53 global next_buildbot_check_due_time | 48 global next_buildbot_check_due_time |
| 54 if time.time() > next_buildbot_check_due_time: | 49 if time.time() > next_buildbot_check_due_time: |
| 55 next_buildbot_check_due_time = time.time() + BUILDBOT_CHECK_FREQUENCY | 50 next_buildbot_check_due_time = time.time() + BUILDBOT_CHECK_INTERVAL |
| 51 sys.stderr.write('Checking buildbot for completed/failed jobs') |
| 56 return True | 52 return True |
| 57 return False | 53 return False |
| 58 | 54 |
| 59 | 55 |
| 60 def _check_failed_buildbot_jobs(locations): | 56 def _check_buildbot_jobs(jobs_to_check): |
| 61 if not locations: | 57 if not jobs_to_check: |
| 62 return None | 58 return None |
| 63 jobs = {} | 59 jobs = {} |
| 64 for loc in locations: | 60 completed_results = [] |
| 65 _, master, builder, job_name = loc.split(':', 3) | 61 failed_results = [] |
| 62 # Mapping from job names to the original dictionary sent in jobs_to_check |
| 63 entries = {} |
| 64 job_urls = {} |
| 65 for entry in jobs_to_check: |
| 66 master = entry['master'] |
| 67 builder = entry['builder'] |
| 68 job_name = entry['job_name'] |
| 69 # The entries in this list may have multiple jobs for a single builder, and |
| 70 # we want to avoid hitting the builder for each job, since we get the |
| 71 # information for all builds each time. |
| 72 # |
| 73 # To prevent this we are taking this: |
| 74 # [{'master': 'M', 'builder': 'B', 'job_name': 'J1'}, |
| 75 # {'master': 'M', 'builder': 'B', 'job_name': 'J2'}, |
| 76 # {'master': 'M', 'builder': 'C', 'job_name': 'J3'}, |
| 77 # ] |
| 78 # And building this in the jobs variable: |
| 79 # {'M': { 'B': ['J1', 'J2'], 'C': ['J3']}} |
| 66 jobs.setdefault(master, {}).setdefault(builder, []).append(job_name) | 80 jobs.setdefault(master, {}).setdefault(builder, []).append(job_name) |
| 81 entries[job_name] = entry |
| 67 for master in jobs.keys(): | 82 for master in jobs.keys(): |
| 68 for builder in jobs[master].keys(): | 83 for builder in jobs[master].keys(): |
| 69 if check_buildbot.main(["check_buildbot", master, builder] | 84 config = { |
| 70 + jobs[master][builder]): | 85 'master': master, |
| 71 return 1 | 86 'builder': builder, |
| 72 return 0 | 87 'job_names': jobs[master][builder], |
| 88 } |
| 89 builder_results = check_buildbot.main(config) |
| 90 completed_results += builder_results.get('completed', []) |
| 91 failed_results += builder_results.get('failed', []) |
| 92 job_urls.update(builder_results.get('job_urls', {})) |
| 93 results = {} |
| 94 if completed_results: |
| 95 results['completed'] = [entries[k] for k in completed_results] |
| 96 if failed_results: |
| 97 results['failed'] = [entries[k] for k in failed_results] |
| 98 for job in results.get('failed', []) + results.get('completed', []): |
| 99 if job['job_name'] in job_urls: |
| 100 job['job_url'] = job_urls[job['job_name']] |
| 101 |
| 102 return results |
| 73 | 103 |
| 74 | 104 |
| 75 def main(argv): | 105 def main(argv): |
| 76 global timeout_interval | 106 """Main function of the script. |
| 107 |
| 108 The script expects the path to gsutil to be provided on the command line, and |
| 109 a json object containing the details of the jobs to monitor on standard input. |
| 110 |
| 111 Each job in the list, should be one of the following types: |
| 112 - GS location, which must at least contain: |
| 113 - The "type" key set to the "gs" value. |
| 114 - The "location" key, containing the location ("gs://...") of the gs |
| 115 object to check. |
| 116 - Buildbot job, which must at least contain: |
| 117 - The "type" key set to the "buildbot" value. |
| 118 - The "master" key containing the name of the appropriate master, e.g. |
| 119 "tryserver.chromium.perf". |
| 120 - The "builder" key set to the name of the builder performing the job. |
| 121 - The "job_name" key containing the name of the job to check. i.e. |
| 122 typically a uuid or a hash will be used. |
| 123 |
| 124 The script will wait until the first of the following conditions becomes true: |
| 125 - An object exists at one of the GS locations |
| 126 - One of the buildbot jobs completes as succesful |
| 127 - One of the buildbot jobs fails |
| 128 - One week elapses from the invocation of the script. (The exact timeout may |
| 129 be overriden from the command line) |
| 130 |
| 131 The return code will be: |
| 132 0 if a buildbot job succeeds or an object exists at the GS locations. |
| 133 1 if a buildbot job fails |
| 134 2 if the one-week timeout is triggered. |
| 135 |
| 136 Additionally, a json object will be written to standard output containig the |
| 137 results of the script. |
| 138 |
| 139 Example of expected stdin: |
| 140 { |
| 141 "jobs": [ |
| 142 { |
| 143 "type": "gs", |
| 144 "location": "gs://chrome-perf/some_path/some_object.json" |
| 145 }, |
| 146 { |
| 147 "type": "buildbot", |
| 148 "master": "tryserver.chromium.perf", |
| 149 "builder": "linux_perf_bisect", |
| 150 "job_name": "f74fb8e0418d47bfb7d01fad0dd4df06" |
| 151 } |
| 152 ] |
| 153 } |
| 154 EOF |
| 155 |
| 156 Examples of results from stdout: |
| 157 cat <<EOF #Successful result |
| 158 { |
| 159 "completed": [ |
| 160 { |
| 161 "type": "buildbot", |
| 162 "master": "tryserver.chromium.perf", |
| 163 "builder": "linux_perf_bisect", |
| 164 "job_name": "f74fb8e0418d47bfb7d01fad0dd4df06" |
| 165 } |
| 166 ] |
| 167 } |
| 168 EOF |
| 169 |
| 170 cat <<EOF #Unsuccessful result |
| 171 { |
| 172 "failed": [ |
| 173 { |
| 174 "type": "buildbot", |
| 175 "master": "tryserver.chromium.perf", |
| 176 "builder": "linux_perf_bisect", |
| 177 "job_name": "f74fb8e0418d47bfb7d01fad0dd4df06" |
| 178 } |
| 179 ] |
| 180 } |
| 181 EOF |
| 182 """ |
| 183 start_time = time.time() |
| 184 # Default timeout: six days |
| 185 timeout_interval = 6 * 24 * 60 * 60 |
| 77 if argv[-1].startswith('--timeout='): | 186 if argv[-1].startswith('--timeout='): |
| 78 timeout_interval = int(argv[-1].split('=')[1]) | 187 timeout_interval = int(argv[-1].split('=')[1]) |
| 79 argv = argv[:-1] | 188 argv = argv[:-1] |
| 80 | 189 |
| 81 if len(argv) < 3: | 190 jobs = json.loads(sys.stdin.read())['jobs'] |
| 82 usage = ('Usage: %s <gsutil path> url1 [url2 [url3...]]' | 191 gs_jobs = [job for job in jobs if job['type'] == 'gs'] |
| 83 ' [--timeout=<seconds>]\n' | 192 buildbot_jobs = [job for job in jobs if job['type'] == 'buildbot'] |
| 84 ' Where urls are either a google storage location for the result ' | 193 |
| 85 ' file, or a buildbot location of the form ' | 194 if ((not gs_jobs and not buildbot_jobs) or |
| 86 '"bb:<master>:<builderi>:<job_name>".') | 195 (gs_jobs and len(argv) < 2)): |
| 87 print usage % argv[0] | 196 return _print_usage(argv) |
| 88 return 1 | 197 |
| 89 | 198 gsutil_path = argv[1] if gs_jobs else '' |
| 90 list_of_urls = ', '.join(['<%s>' % url for url in argv[2:]]) | 199 |
| 91 print 'Waiting for the following urls: ' + list_of_urls | 200 while time.time() < start_time + timeout_interval: |
| 92 global gsutil_path | 201 # Checking GS jobs |
| 93 start_time = time.time() | 202 completed_jobs = [] |
| 94 gsutil_path = argv[1] | 203 for job in gs_jobs: |
| 95 urls = argv[2:] | 204 if _gs_file_exists(gsutil_path, job['location']): |
| 96 while urls: | 205 completed_jobs.append(job) |
| 97 for url in urls: | 206 |
| 98 if url.startswith('bb:'): | 207 # Checking Buildbot jobs |
| 99 pass | 208 if completed_jobs or _next_buildbot_check_due(): |
| 100 elif _gs_file_exists(url): | 209 # buildbot_results will only contain jobs that have been completed or |
| 101 print 'Build finished: ', url | 210 # failed. All other jobs (scheduled, in progress, etc.) will be ignored. |
| 102 return 0 | 211 buildbot_results = _check_buildbot_jobs(buildbot_jobs) |
| 103 if time.time() - start_time > timeout_interval: | 212 if buildbot_results: |
| 104 print "Timed out waiting for: ", urls | 213 print json.dumps(buildbot_results) |
| 105 return 1 | 214 if 'completed' in buildbot_results and buildbot_results['completed']: |
| 106 if _next_buildbot_check_due(): | 215 return COMPLETED |
| 107 failed_job = _check_failed_buildbot_jobs( | 216 return FAILED |
| 108 [url for url in urls if url.startswith('bb:')]) | 217 |
| 109 if failed_job: | 218 if completed_jobs: |
| 110 return 0 | 219 # This clause is just a fallback. Ideally when a results file shows up at |
| 111 time.sleep(LONG_INTERVAL) | 220 # a gs location, we'd want to run check_buildbot jobs first to find the |
| 112 | 221 # url to the job detaisl. |
| 113 | 222 print json.dumps({'completed': completed_jobs}) |
| 114 print "No jobs to check." | 223 return COMPLETED |
| 115 return 0 | 224 # At this point, no jobs were completed nor failed. We print a char to |
| 116 | 225 # prevent buildbot from killing this process for inactivity. |
| 226 sys.stderr.write('Sleeping.\n') |
| 227 sys.stderr.flush() |
| 228 time.sleep(SLEEP_INTERVAL) |
| 229 return TIMED_OUT |
| 117 | 230 |
| 118 if __name__ == '__main__': | 231 if __name__ == '__main__': |
| 119 sys.exit(main(sys.argv)) | 232 sys.exit(main(sys.argv)) |
| OLD | NEW |