Chromium Code Reviews| Index: scripts/slave/recipe_modules/auto_bisect/resources/wait_for_any.py |
| diff --git a/scripts/slave/recipe_modules/auto_bisect/resources/wait_for_any.py b/scripts/slave/recipe_modules/auto_bisect/resources/wait_for_any.py |
| index 27be07da84d6987d9161c656541e9f039f33a708..ad8d65fd4beb55224e8d4764dd03295199455893 100755 |
| --- a/scripts/slave/recipe_modules/auto_bisect/resources/wait_for_any.py |
| +++ b/scripts/slave/recipe_modules/auto_bisect/resources/wait_for_any.py |
| @@ -1,5 +1,4 @@ |
| #!/usr/bin/python |
| -# |
| # Copyright 2015 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| @@ -7,113 +6,203 @@ |
| """Waits for any one job out of a list to complete or a default timeout.""" |
| import json |
| +import os |
| import subprocess |
| import sys |
| import time |
| -import urllib2 |
| import check_buildbot |
| +# Return codes. Note that at this time COMPLETED == FAILED. |
| +COMPLETED, FAILED, TIMED_OUT, NOT_EVEN_STARTED = 0, 0, 2, 3 |
|
prasadv
2015/09/17 22:33:35
COMPLETED == FAILED, shouldn't we mark the failed
RobertoCN
2015/09/19 00:32:54
Completed now != failed.
NOT_EVEN_STARTED, means t
|
| # The following intervals are specified in seconds, are expected to be sent as |
| # arguments to time.sleep() |
| -# All URLs are checked in sequence separated by 'short' interval seconds, to |
| -# prevent possibly getting throttled by whatever endpoint gsutil or urllib are |
| -# hitting. |
| -SHORT_INTERVAL = 0.4 |
| + |
| # If none of the URLs is determined to be ready, we sleep for a 'long' |
| # interval. |
| -LONG_INTERVAL = 60 |
| +SLEEP_INTERVAL = 60 |
| # We should check buildbot not more often than every 10 minutes. |
| -BUILDBOT_CHECK_FREQUENCY = 600 |
| -# If the 'timeout' interval elapses without any URL becoming ready, we fail. |
| -timeout_interval = 60 * 60 |
| -# Global gsutil path, expected to be set by main. |
| -gsutil_path = '' |
| +BUILDBOT_CHECK_INTERVAL = 600 |
| + |
| next_buildbot_check_due_time = 0 |
| -def _run_gsutil(cmd): |
| - # Sleep for a short time between gsutil calls |
| - time.sleep(SHORT_INTERVAL) |
| - cmd = [gsutil_path] + cmd |
| - try: |
| - out = subprocess.check_output(cmd) |
| - return 0, out |
| - except subprocess.CalledProcessError as cpe: |
| - return cpe.returncode, cpe.output |
| +def _print_usage(argv): |
| + usage = 'Usage: %s <gsutil path> [--timeout=<seconds>]' |
| + print usage % argv[0] |
| + print 'main.__doc__' |
| + print main.__doc__ |
| + return NOT_EVEN_STARTED |
| -def _gs_file_exists(url): |
| +def _gs_file_exists(gsutil_path, url): |
| """Checks that running 'gsutil ls' returns 0 to see if file at url exists.""" |
| - return _run_gsutil(['ls', url])[0] == 0 |
| + cmd = [gsutil_path, 'ls', url] |
| + error = subprocess.call(cmd, stderr=open(os.devnull, 'w')) |
| + return not error |
| def _next_buildbot_check_due(): |
| + """To limit how often we pull the [potentially big] json object from bb.""" |
| global next_buildbot_check_due_time |
| if time.time() > next_buildbot_check_due_time: |
| - next_buildbot_check_due_time = time.time() + BUILDBOT_CHECK_FREQUENCY |
| + next_buildbot_check_due_time = time.time() + BUILDBOT_CHECK_INTERVAL |
| return True |
| return False |
| -def _check_failed_buildbot_jobs(locations): |
| - if not locations: |
| +def _check_buildbot_jobs(jobs_to_check): |
| + if not jobs_to_check: |
| return None |
| jobs = {} |
| - for loc in locations: |
| - _, master, builder, job_name = loc.split(':', 3) |
| + completed_results = [] |
| + failed_results = [] |
| + # Mapping from job names to the original dictionary sent in jobs_to_check |
| + entries = {} |
| + for entry in jobs_to_check: |
| + master = entry['master'] |
| + builder = entry['builder'] |
| + job_name = entry['job_name'] |
| + # Building a nested dictionary so that we check at most once per builder. |
|
prasadv
2015/09/17 22:33:35
May be we should elaborate these comments.
RobertoCN
2015/09/19 00:32:54
Done.
|
| jobs.setdefault(master, {}).setdefault(builder, []).append(job_name) |
| + entries[job_name] = entry |
| for master in jobs.keys(): |
| for builder in jobs[master].keys(): |
| - if check_buildbot.main(["check_buildbot", master, builder] |
| - + jobs[master][builder]): |
| - return 1 |
| - return 0 |
| + config = { |
| + 'master': master, |
| + 'builder': builder, |
| + 'job_names': jobs[master][builder], |
| + } |
| + builder_results = check_buildbot.main(config) |
| + completed_results += builder_results.get('completed', []) |
| + failed_results += builder_results.get('failed', []) |
| + results = {} |
| + if completed_results: |
| + results['completed'] = [entries[k] for k in completed_results] |
| + if failed_results: |
| + results['failed'] = [entries[k] for k in failed_results] |
| + return results |
| def main(argv): |
| - global timeout_interval |
| + """Main function of the script. |
| + |
| + The script expects the path to gsutil to be provided on the command line, and |
| + a json object containing the details of the jobs to monitor on standard input. |
| + |
| + Each job in the list, should be one of the following types: |
| + - GS location, which must at least contain: |
| + - The "type" key set to the "gs" value. |
| + - The "location" key, containing the location ("gs://...") of the gs |
| + object to check. |
| + - Buildbot job, which must at least contain: |
| + - The "type" key set to the "buildbot" value. |
| + - The "master" key containing the name of the appropriate master, e.g. |
| + "chromium.perf". |
|
prasadv
2015/09/17 22:33:35
May be tryserver.chromium.perf be more relevant
RobertoCN
2015/09/19 00:32:54
Done.
|
| + - The "builder" key set to the name of the builder performing the job. |
| + - The "job_name" key containing the name of the job to check. i.e. |
| + typically a uuid or a hash will be used. |
| + |
| + The script will wait until the first of the following conditions becomes true: |
| + - An object exists at one of the GS locations |
| + - One of the buildbot jobs completes as succesful |
| + - One of the buildbot jobs fails |
| + - One week elapses from the invocation of the script. (The exact timeout may |
| + be overriden from the command line) |
| + |
| + The return code will be: |
| + 0 if a buildbot job succeeds or an object exists at the GS locations. |
| + 1 if a buildbot job fails |
| + 2 if the one-week timeout is triggered. |
|
prasadv
2015/09/17 22:33:35
What if the case where buildbot job succeeds but t
RobertoCN
2015/09/19 00:32:54
In that case it will still return 0. We won't wait
|
| + |
| + Additionally, a json object will be written to standard output containig the |
| + results of the script. |
| + |
| + Example of expected stdin: |
| + { |
| + "jobs": [ |
| + { |
| + "type": "gs", |
| + "location": "gs://chrome-perf/some_path/some_object.json" |
| + }, |
| + { |
| + "type": "buildbot", |
| + "master": "tryserver.chromium.perf", |
| + "builder": "linux_perf_bisect", |
| + "job_name": "f74fb8e0418d47bfb7d01fad0dd4df06" |
| + } |
| + ] |
| + } |
| + EOF |
| + |
| + Examples of results from stdout: |
| + cat <<EOF #Successful result |
| + { |
| + "completed": [ |
| + { |
| + "type": "buildbot", |
| + "master": "tryserver.chromium.perf", |
| + "builder": "linux_perf_bisect", |
| + "job_name": "f74fb8e0418d47bfb7d01fad0dd4df06" |
| + } |
| + ] |
| + } |
| + EOF |
| + |
| + cat <<EOF #Unsuccessful result |
| + { |
| + "failed": [ |
| + { |
| + "type": "buildbot", |
| + "master": "tryserver.chromium.perf", |
| + "builder": "linux_perf_bisect", |
| + "job_name": "f74fb8e0418d47bfb7d01fad0dd4df06" |
| + } |
| + ] |
| + } |
| + EOF |
| + """ |
| + start_time = time.time() |
| + # Default timeout: six days |
| + timeout_interval = 6 * 24 * 60 * 60 |
| if argv[-1].startswith('--timeout='): |
| timeout_interval = int(argv[-1].split('=')[1]) |
| argv = argv[:-1] |
| - if len(argv) < 3: |
| - usage = ('Usage: %s <gsutil path> url1 [url2 [url3...]]' |
| - ' [--timeout=<seconds>]\n' |
| - ' Where urls are either a google storage location for the result ' |
| - ' file, or a buildbot location of the form ' |
| - '"bb:<master>:<builderi>:<job_name>".') |
| - print usage % argv[0] |
| - return 1 |
| - |
| - list_of_urls = ', '.join(['<%s>' % url for url in argv[2:]]) |
| - print 'Waiting for the following urls: ' + list_of_urls |
| - global gsutil_path |
| - start_time = time.time() |
| - gsutil_path = argv[1] |
| - urls = argv[2:] |
| - while urls: |
| - for url in urls: |
| - if url.startswith('bb:'): |
| - pass |
| - elif _gs_file_exists(url): |
| - print 'Build finished: ', url |
| - return 0 |
| - if time.time() - start_time > timeout_interval: |
| - print "Timed out waiting for: ", urls |
| - return 1 |
| - if _next_buildbot_check_due(): |
| - failed_job = _check_failed_buildbot_jobs( |
| - [url for url in urls if url.startswith('bb:')]) |
| - if failed_job: |
| - return 0 |
| - time.sleep(LONG_INTERVAL) |
| + jobs = json.loads(sys.stdin.read())['jobs'] |
| + gs_jobs = [job for job in jobs if job['type'] == 'gs'] |
| + buildbot_jobs = [job for job in jobs if job['type'] == 'buildbot'] |
| + |
| + if ((not gs_jobs and not buildbot_jobs) or |
| + (gs_jobs and len(argv) < 2)): |
| + return _print_usage(argv) |
| + gsutil_path = argv[1] if gs_jobs else '' |
| - print "No jobs to check." |
| - return 0 |
| + while time.time() < start_time + timeout_interval: |
| + # Checking GS jobs |
| + completed_jobs = [] |
| + for job in gs_jobs: |
| + if _gs_file_exists(gsutil_path, job['location']): |
| + completed_jobs.append(job) |
| + if completed_jobs: |
| + print json.dumps({'completed': completed_jobs}) |
| + return COMPLETED |
| + # Checking Buildbot jobs |
| + if _next_buildbot_check_due(): |
| + buildbot_results = _check_buildbot_jobs(buildbot_jobs) |
| + if buildbot_results: |
| + print json.dumps(buildbot_results) |
| + if 'completed' in buildbot_results and buildbot_results['completed']: |
| + return COMPLETED |
| + return FAILED |
|
prasadv
2015/09/17 22:33:35
How are we handling pending jobs, I mean jobs in q
RobertoCN
2015/09/19 00:32:54
They are considered in progress, as they are neith
|
| + |
| + sys.stdout.write('.') |
| + sys.stdout.flush() |
| + time.sleep(SLEEP_INTERVAL) |
| + return TIMED_OUT |
| if __name__ == '__main__': |
| sys.exit(main(sys.argv)) |