Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 #!/usr/bin/python | 1 #!/usr/bin/python |
| 2 # | |
| 3 # Copyright 2015 The Chromium Authors. All rights reserved. | 2 # Copyright 2015 The Chromium Authors. All rights reserved. |
| 4 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
| 5 # found in the LICENSE file. | 4 # found in the LICENSE file. |
| 6 | 5 |
| 7 """Waits for any one job out of a list to complete or a default timeout.""" | 6 """Waits for any one job out of a list to complete or a default timeout.""" |
| 8 | 7 |
| 9 import json | 8 import json |
| 9 import os | |
| 10 import subprocess | 10 import subprocess |
| 11 import sys | 11 import sys |
| 12 import time | 12 import time |
| 13 import urllib2 | |
| 14 | 13 |
| 15 import check_buildbot | 14 import check_buildbot |
| 16 | 15 |
| 16 # Return codes. Note that at this time COMPLETED == FAILED. | |
| 17 COMPLETED, FAILED, TIMED_OUT, NOT_EVEN_STARTED = 0, 0, 2, 3 | |
|
prasadv
2015/09/17 22:33:35
COMPLETED == FAILED, shouldn't we mark the failed
RobertoCN
2015/09/19 00:32:54
Completed now != failed.
NOT_EVEN_STARTED, means t
| |
| 17 | 18 |
| 18 # The following intervals are specified in seconds, are expected to be sent as | 19 # The following intervals are specified in seconds, are expected to be sent as |
| 19 # arguments to time.sleep() | 20 # arguments to time.sleep() |
| 20 # All URLs are checked in sequence separated by 'short' interval seconds, to | 21 |
| 21 # prevent possibly getting throttled by whatever endpoint gsutil or urllib are | |
| 22 # hitting. | |
| 23 SHORT_INTERVAL = 0.4 | |
| 24 # If none of the URLs is determined to be ready, we sleep for a 'long' | 22 # If none of the URLs is determined to be ready, we sleep for a 'long' |
| 25 # interval. | 23 # interval. |
| 26 LONG_INTERVAL = 60 | 24 SLEEP_INTERVAL = 60 |
| 27 # We should check buildbot not more often than every 10 minutes. | 25 # We should check buildbot not more often than every 10 minutes. |
| 28 BUILDBOT_CHECK_FREQUENCY = 600 | 26 BUILDBOT_CHECK_INTERVAL = 600 |
| 29 # If the 'timeout' interval elapses without any URL becoming ready, we fail. | 27 |
| 30 timeout_interval = 60 * 60 | |
| 31 # Global gsutil path, expected to be set by main. | |
| 32 gsutil_path = '' | |
| 33 next_buildbot_check_due_time = 0 | 28 next_buildbot_check_due_time = 0 |
| 34 | 29 |
| 35 | 30 |
| 36 def _run_gsutil(cmd): | 31 def _print_usage(argv): |
| 37 # Sleep for a short time between gsutil calls | 32 usage = 'Usage: %s <gsutil path> [--timeout=<seconds>]' |
| 38 time.sleep(SHORT_INTERVAL) | 33 print usage % argv[0] |
| 39 cmd = [gsutil_path] + cmd | 34 print 'main.__doc__' |
| 40 try: | 35 print main.__doc__ |
| 41 out = subprocess.check_output(cmd) | 36 return NOT_EVEN_STARTED |
| 42 return 0, out | 37 |
| 43 except subprocess.CalledProcessError as cpe: | 38 |
| 44 return cpe.returncode, cpe.output | 39 def _gs_file_exists(gsutil_path, url): |
| 45 | |
| 46 | |
| 47 def _gs_file_exists(url): | |
| 48 """Checks that running 'gsutil ls' returns 0 to see if file at url exists.""" | 40 """Checks that running 'gsutil ls' returns 0 to see if file at url exists.""" |
| 49 return _run_gsutil(['ls', url])[0] == 0 | 41 cmd = [gsutil_path, 'ls', url] |
| 42 error = subprocess.call(cmd, stderr=open(os.devnull, 'w')) | |
| 43 return not error | |
| 50 | 44 |
| 51 | 45 |
| 52 def _next_buildbot_check_due(): | 46 def _next_buildbot_check_due(): |
| 47 """To limit how often we pull the [potentially big] json object from bb.""" | |
| 53 global next_buildbot_check_due_time | 48 global next_buildbot_check_due_time |
| 54 if time.time() > next_buildbot_check_due_time: | 49 if time.time() > next_buildbot_check_due_time: |
| 55 next_buildbot_check_due_time = time.time() + BUILDBOT_CHECK_FREQUENCY | 50 next_buildbot_check_due_time = time.time() + BUILDBOT_CHECK_INTERVAL |
| 56 return True | 51 return True |
| 57 return False | 52 return False |
| 58 | 53 |
| 59 | 54 |
| 60 def _check_failed_buildbot_jobs(locations): | 55 def _check_buildbot_jobs(jobs_to_check): |
| 61 if not locations: | 56 if not jobs_to_check: |
| 62 return None | 57 return None |
| 63 jobs = {} | 58 jobs = {} |
| 64 for loc in locations: | 59 completed_results = [] |
| 65 _, master, builder, job_name = loc.split(':', 3) | 60 failed_results = [] |
| 61 # Mapping from job names to the original dictionary sent in jobs_to_check | |
| 62 entries = {} | |
| 63 for entry in jobs_to_check: | |
| 64 master = entry['master'] | |
| 65 builder = entry['builder'] | |
| 66 job_name = entry['job_name'] | |
| 67 # Building a nested dictionary so that we check at most once per builder. | |
|
prasadv
2015/09/17 22:33:35
May be we should elaborate these comments.
RobertoCN
2015/09/19 00:32:54
Done.
| |
| 66 jobs.setdefault(master, {}).setdefault(builder, []).append(job_name) | 68 jobs.setdefault(master, {}).setdefault(builder, []).append(job_name) |
| 69 entries[job_name] = entry | |
| 67 for master in jobs.keys(): | 70 for master in jobs.keys(): |
| 68 for builder in jobs[master].keys(): | 71 for builder in jobs[master].keys(): |
| 69 if check_buildbot.main(["check_buildbot", master, builder] | 72 config = { |
| 70 + jobs[master][builder]): | 73 'master': master, |
| 71 return 1 | 74 'builder': builder, |
| 72 return 0 | 75 'job_names': jobs[master][builder], |
| 76 } | |
| 77 builder_results = check_buildbot.main(config) | |
| 78 completed_results += builder_results.get('completed', []) | |
| 79 failed_results += builder_results.get('failed', []) | |
| 80 results = {} | |
| 81 if completed_results: | |
| 82 results['completed'] = [entries[k] for k in completed_results] | |
| 83 if failed_results: | |
| 84 results['failed'] = [entries[k] for k in failed_results] | |
| 85 return results | |
| 73 | 86 |
| 74 | 87 |
| 75 def main(argv): | 88 def main(argv): |
| 76 global timeout_interval | 89 """Main function of the script. |
| 90 | |
| 91 The script expects the path to gsutil to be provided on the command line, and | |
| 92 a json object containing the details of the jobs to monitor on standard input. | |
| 93 | |
| 94 Each job in the list, should be one of the following types: | |
| 95 - GS location, which must at least contain: | |
| 96 - The "type" key set to the "gs" value. | |
| 97 - The "location" key, containing the location ("gs://...") of the gs | |
| 98 object to check. | |
| 99 - Buildbot job, which must at least contain: | |
| 100 - The "type" key set to the "buildbot" value. | |
| 101 - The "master" key containing the name of the appropriate master, e.g. | |
| 102 "chromium.perf". | |
|
prasadv
2015/09/17 22:33:35
May be tryserver.chromium.perf be more relevant
RobertoCN
2015/09/19 00:32:54
Done.
| |
| 103 - The "builder" key set to the name of the builder performing the job. | |
| 104 - The "job_name" key containing the name of the job to check. i.e. | |
| 105 typically a uuid or a hash will be used. | |
| 106 | |
| 107 The script will wait until the first of the following conditions becomes true: | |
| 108 - An object exists at one of the GS locations | |
| 109 - One of the buildbot jobs completes as succesful | |
| 110 - One of the buildbot jobs fails | |
| 111 - One week elapses from the invocation of the script. (The exact timeout may | |
| 112 be overriden from the command line) | |
| 113 | |
| 114 The return code will be: | |
| 115 0 if a buildbot job succeeds or an object exists at the GS locations. | |
| 116 1 if a buildbot job fails | |
| 117 2 if the one-week timeout is triggered. | |
|
prasadv
2015/09/17 22:33:35
What if the case where buildbot job succeeds but t
RobertoCN
2015/09/19 00:32:54
In that case it will still return 0. We won't wait
| |
| 118 | |
| 119 Additionally, a json object will be written to standard output containig the | |
| 120 results of the script. | |
| 121 | |
| 122 Example of expected stdin: | |
| 123 { | |
| 124 "jobs": [ | |
| 125 { | |
| 126 "type": "gs", | |
| 127 "location": "gs://chrome-perf/some_path/some_object.json" | |
| 128 }, | |
| 129 { | |
| 130 "type": "buildbot", | |
| 131 "master": "tryserver.chromium.perf", | |
| 132 "builder": "linux_perf_bisect", | |
| 133 "job_name": "f74fb8e0418d47bfb7d01fad0dd4df06" | |
| 134 } | |
| 135 ] | |
| 136 } | |
| 137 EOF | |
| 138 | |
| 139 Examples of results from stdout: | |
| 140 cat <<EOF #Successful result | |
| 141 { | |
| 142 "completed": [ | |
| 143 { | |
| 144 "type": "buildbot", | |
| 145 "master": "tryserver.chromium.perf", | |
| 146 "builder": "linux_perf_bisect", | |
| 147 "job_name": "f74fb8e0418d47bfb7d01fad0dd4df06" | |
| 148 } | |
| 149 ] | |
| 150 } | |
| 151 EOF | |
| 152 | |
| 153 cat <<EOF #Unsuccessful result | |
| 154 { | |
| 155 "failed": [ | |
| 156 { | |
| 157 "type": "buildbot", | |
| 158 "master": "tryserver.chromium.perf", | |
| 159 "builder": "linux_perf_bisect", | |
| 160 "job_name": "f74fb8e0418d47bfb7d01fad0dd4df06" | |
| 161 } | |
| 162 ] | |
| 163 } | |
| 164 EOF | |
| 165 """ | |
| 166 start_time = time.time() | |
| 167 # Default timeout: six days | |
| 168 timeout_interval = 6 * 24 * 60 * 60 | |
| 77 if argv[-1].startswith('--timeout='): | 169 if argv[-1].startswith('--timeout='): |
| 78 timeout_interval = int(argv[-1].split('=')[1]) | 170 timeout_interval = int(argv[-1].split('=')[1]) |
| 79 argv = argv[:-1] | 171 argv = argv[:-1] |
| 80 | 172 |
| 81 if len(argv) < 3: | 173 jobs = json.loads(sys.stdin.read())['jobs'] |
| 82 usage = ('Usage: %s <gsutil path> url1 [url2 [url3...]]' | 174 gs_jobs = [job for job in jobs if job['type'] == 'gs'] |
| 83 ' [--timeout=<seconds>]\n' | 175 buildbot_jobs = [job for job in jobs if job['type'] == 'buildbot'] |
| 84 ' Where urls are either a google storage location for the result ' | 176 |
| 85 ' file, or a buildbot location of the form ' | 177 if ((not gs_jobs and not buildbot_jobs) or |
| 86 '"bb:<master>:<builderi>:<job_name>".') | 178 (gs_jobs and len(argv) < 2)): |
| 87 print usage % argv[0] | 179 return _print_usage(argv) |
| 88 return 1 | 180 |
| 89 | 181 gsutil_path = argv[1] if gs_jobs else '' |
| 90 list_of_urls = ', '.join(['<%s>' % url for url in argv[2:]]) | 182 |
| 91 print 'Waiting for the following urls: ' + list_of_urls | 183 while time.time() < start_time + timeout_interval: |
| 92 global gsutil_path | 184 # Checking GS jobs |
| 93 start_time = time.time() | 185 completed_jobs = [] |
| 94 gsutil_path = argv[1] | 186 for job in gs_jobs: |
| 95 urls = argv[2:] | 187 if _gs_file_exists(gsutil_path, job['location']): |
| 96 while urls: | 188 completed_jobs.append(job) |
| 97 for url in urls: | 189 if completed_jobs: |
| 98 if url.startswith('bb:'): | 190 print json.dumps({'completed': completed_jobs}) |
| 99 pass | 191 return COMPLETED |
| 100 elif _gs_file_exists(url): | 192 |
| 101 print 'Build finished: ', url | 193 # Checking Buildbot jobs |
| 102 return 0 | |
| 103 if time.time() - start_time > timeout_interval: | |
| 104 print "Timed out waiting for: ", urls | |
| 105 return 1 | |
| 106 if _next_buildbot_check_due(): | 194 if _next_buildbot_check_due(): |
| 107 failed_job = _check_failed_buildbot_jobs( | 195 buildbot_results = _check_buildbot_jobs(buildbot_jobs) |
| 108 [url for url in urls if url.startswith('bb:')]) | 196 if buildbot_results: |
| 109 if failed_job: | 197 print json.dumps(buildbot_results) |
| 110 return 0 | 198 if 'completed' in buildbot_results and buildbot_results['completed']: |
| 111 time.sleep(LONG_INTERVAL) | 199 return COMPLETED |
| 112 | 200 return FAILED |
|
prasadv
2015/09/17 22:33:35
How are we handling pending jobs, I mean jobs in q
RobertoCN
2015/09/19 00:32:54
They are considered in progress, as they are neith
| |
| 113 | 201 |
| 114 print "No jobs to check." | 202 sys.stdout.write('.') |
| 115 return 0 | 203 sys.stdout.flush() |
| 116 | 204 time.sleep(SLEEP_INTERVAL) |
| 205 return TIMED_OUT | |
| 117 | 206 |
| 118 if __name__ == '__main__': | 207 if __name__ == '__main__': |
| 119 sys.exit(main(sys.argv)) | 208 sys.exit(main(sys.argv)) |
| OLD | NEW |