Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(119)

Unified Diff: scripts/slave/recipe_modules/auto_bisect/resources/wait_for_any.py

Issue 1339613005: Refactoring scripts that wait for buildbot jobs to complete. (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/build.git@hax
Patch Set: removing blank line Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: scripts/slave/recipe_modules/auto_bisect/resources/wait_for_any.py
diff --git a/scripts/slave/recipe_modules/auto_bisect/resources/wait_for_any.py b/scripts/slave/recipe_modules/auto_bisect/resources/wait_for_any.py
index 27be07da84d6987d9161c656541e9f039f33a708..87ffc57c2d794c78924ef5e67589441fa05659a1 100755
--- a/scripts/slave/recipe_modules/auto_bisect/resources/wait_for_any.py
+++ b/scripts/slave/recipe_modules/auto_bisect/resources/wait_for_any.py
@@ -1,5 +1,4 @@
#!/usr/bin/python
-#
# Copyright 2015 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
@@ -7,113 +6,227 @@
"""Waits for any one job out of a list to complete or a default timeout."""
import json
+import os
import subprocess
import sys
import time
-import urllib2
import check_buildbot
+# Return codes.
+COMPLETED, FAILED, TIMED_OUT, BAD_ARGS = 0, 1, 2, 3
# The following intervals are specified in seconds, are expected to be sent as
# arguments to time.sleep()
-# All URLs are checked in sequence separated by 'short' interval seconds, to
-# prevent possibly getting throttled by whatever endpoint gsutil or urllib are
-# hitting.
-SHORT_INTERVAL = 0.4
+
# If none of the URLs is determined to be ready, we sleep for a 'long'
# interval.
-LONG_INTERVAL = 60
+SLEEP_INTERVAL = 60
# We should check buildbot not more often than every 10 minutes.
-BUILDBOT_CHECK_FREQUENCY = 600
-# If the 'timeout' interval elapses without any URL becoming ready, we fail.
-timeout_interval = 60 * 60
-# Global gsutil path, expected to be set by main.
-gsutil_path = ''
+BUILDBOT_CHECK_INTERVAL = 600
+
next_buildbot_check_due_time = 0
-def _run_gsutil(cmd):
- # Sleep for a short time between gsutil calls
- time.sleep(SHORT_INTERVAL)
- cmd = [gsutil_path] + cmd
- try:
- out = subprocess.check_output(cmd)
- return 0, out
- except subprocess.CalledProcessError as cpe:
- return cpe.returncode, cpe.output
+def _print_usage(argv):
+ usage = 'Usage: %s <gsutil path> [--timeout=<seconds>]'
+ print usage % argv[0]
+ print 'main.__doc__'
+ print main.__doc__
+ return BAD_ARGS
-def _gs_file_exists(url):
+def _gs_file_exists(gsutil_path, url):
"""Checks that running 'gsutil ls' returns 0 to see if file at url exists."""
- return _run_gsutil(['ls', url])[0] == 0
+ cmd = [gsutil_path, 'ls', url]
+ error = subprocess.call(cmd, stdout=open(os.devnull, 'wb'))
+ return not error
def _next_buildbot_check_due():
+ """To limit how often we pull the [potentially big] json object from bb."""
global next_buildbot_check_due_time
if time.time() > next_buildbot_check_due_time:
- next_buildbot_check_due_time = time.time() + BUILDBOT_CHECK_FREQUENCY
+ next_buildbot_check_due_time = time.time() + BUILDBOT_CHECK_INTERVAL
+ sys.stderr.write('Checking buildbot for completed/failed jobs')
return True
return False
-def _check_failed_buildbot_jobs(locations):
- if not locations:
+def _check_buildbot_jobs(jobs_to_check):
+ if not jobs_to_check:
return None
jobs = {}
- for loc in locations:
- _, master, builder, job_name = loc.split(':', 3)
+ completed_results = []
+ failed_results = []
+ # Mapping from job names to the original dictionary sent in jobs_to_check
+ entries = {}
+ job_urls = {}
+ for entry in jobs_to_check:
+ master = entry['master']
+ builder = entry['builder']
+ job_name = entry['job_name']
+ # The entries in this list may have multiple jobs for a single builder, and
+ # we want to avoid hitting the builder for each job, since we get the
+ # information for all builds each time.
+ #
+ # To prevent this we are taking this:
+ # [{'master': 'M', 'builder': 'B', 'job_name': 'J1'},
+ # {'master': 'M', 'builder': 'B', 'job_name': 'J2'},
+ # {'master': 'M', 'builder': 'C', 'job_name': 'J3'},
+ # ]
+ # And building this in the jobs variable:
+ # {'M': { 'B': ['J1', 'J2'], 'C': ['J3']}}
jobs.setdefault(master, {}).setdefault(builder, []).append(job_name)
+ entries[job_name] = entry
for master in jobs.keys():
for builder in jobs[master].keys():
- if check_buildbot.main(["check_buildbot", master, builder]
- + jobs[master][builder]):
- return 1
- return 0
+ config = {
+ 'master': master,
+ 'builder': builder,
+ 'job_names': jobs[master][builder],
+ }
+ builder_results = check_buildbot.main(config)
+ completed_results += builder_results.get('completed', [])
+ failed_results += builder_results.get('failed', [])
+ job_urls.update(builder_results.get('job_urls', {}))
+ results = {}
+ if completed_results:
+ results['completed'] = [entries[k] for k in completed_results]
+ if failed_results:
+ results['failed'] = [entries[k] for k in failed_results]
+ for job in results.get('failed', []) + results.get('completed', []):
+ if job['job_name'] in job_urls:
+ job['job_url'] = job_urls[job['job_name']]
+
+ return results
def main(argv):
- global timeout_interval
+ """Main function of the script.
+
+ The script expects the path to gsutil to be provided on the command line, and
+ a json object containing the details of the jobs to monitor on standard input.
+
+ Each job in the list, should be one of the following types:
+ - GS location, which must at least contain:
+ - The "type" key set to the "gs" value.
+ - The "location" key, containing the location ("gs://...") of the gs
+ object to check.
+ - Buildbot job, which must at least contain:
+ - The "type" key set to the "buildbot" value.
+ - The "master" key containing the name of the appropriate master, e.g.
+ "tryserver.chromium.perf".
+ - The "builder" key set to the name of the builder performing the job.
+ - The "job_name" key containing the name of the job to check. i.e.
+ typically a uuid or a hash will be used.
+
+ The script will wait until the first of the following conditions becomes true:
+ - An object exists at one of the GS locations
+ - One of the buildbot jobs completes as succesful
+ - One of the buildbot jobs fails
+ - One week elapses from the invocation of the script. (The exact timeout may
+ be overriden from the command line)
+
+ The return code will be:
+ 0 if a buildbot job succeeds or an object exists at the GS locations.
+ 1 if a buildbot job fails
+ 2 if the one-week timeout is triggered.
+
+ Additionally, a json object will be written to standard output containig the
+ results of the script.
+
+ Example of expected stdin:
+ {
+ "jobs": [
+ {
+ "type": "gs",
+ "location": "gs://chrome-perf/some_path/some_object.json"
+ },
+ {
+ "type": "buildbot",
+ "master": "tryserver.chromium.perf",
+ "builder": "linux_perf_bisect",
+ "job_name": "f74fb8e0418d47bfb7d01fad0dd4df06"
+ }
+ ]
+ }
+ EOF
+
+ Examples of results from stdout:
+ cat <<EOF #Successful result
+ {
+ "completed": [
+ {
+ "type": "buildbot",
+ "master": "tryserver.chromium.perf",
+ "builder": "linux_perf_bisect",
+ "job_name": "f74fb8e0418d47bfb7d01fad0dd4df06"
+ }
+ ]
+ }
+ EOF
+
+ cat <<EOF #Unsuccessful result
+ {
+ "failed": [
+ {
+ "type": "buildbot",
+ "master": "tryserver.chromium.perf",
+ "builder": "linux_perf_bisect",
+ "job_name": "f74fb8e0418d47bfb7d01fad0dd4df06"
+ }
+ ]
+ }
+ EOF
+ """
+ start_time = time.time()
+ # Default timeout: six days
+ timeout_interval = 6 * 24 * 60 * 60
if argv[-1].startswith('--timeout='):
timeout_interval = int(argv[-1].split('=')[1])
argv = argv[:-1]
- if len(argv) < 3:
- usage = ('Usage: %s <gsutil path> url1 [url2 [url3...]]'
- ' [--timeout=<seconds>]\n'
- ' Where urls are either a google storage location for the result '
- ' file, or a buildbot location of the form '
- '"bb:<master>:<builderi>:<job_name>".')
- print usage % argv[0]
- return 1
-
- list_of_urls = ', '.join(['<%s>' % url for url in argv[2:]])
- print 'Waiting for the following urls: ' + list_of_urls
- global gsutil_path
- start_time = time.time()
- gsutil_path = argv[1]
- urls = argv[2:]
- while urls:
- for url in urls:
- if url.startswith('bb:'):
- pass
- elif _gs_file_exists(url):
- print 'Build finished: ', url
- return 0
- if time.time() - start_time > timeout_interval:
- print "Timed out waiting for: ", urls
- return 1
- if _next_buildbot_check_due():
- failed_job = _check_failed_buildbot_jobs(
- [url for url in urls if url.startswith('bb:')])
- if failed_job:
- return 0
- time.sleep(LONG_INTERVAL)
-
-
- print "No jobs to check."
- return 0
-
+ jobs = json.loads(sys.stdin.read())['jobs']
+ gs_jobs = [job for job in jobs if job['type'] == 'gs']
+ buildbot_jobs = [job for job in jobs if job['type'] == 'buildbot']
+
+ if ((not gs_jobs and not buildbot_jobs) or
+ (gs_jobs and len(argv) < 2)):
+ return _print_usage(argv)
+
+ gsutil_path = argv[1] if gs_jobs else ''
+
+ while time.time() < start_time + timeout_interval:
+ # Checking GS jobs
+ completed_jobs = []
+ for job in gs_jobs:
+ if _gs_file_exists(gsutil_path, job['location']):
+ completed_jobs.append(job)
+
+ # Checking Buildbot jobs
+ if completed_jobs or _next_buildbot_check_due():
+ # buildbot_results will only contain jobs that have been completed or
+ # failed. All other jobs (scheduled, in progress, etc.) will be ignored.
+ buildbot_results = _check_buildbot_jobs(buildbot_jobs)
+ if buildbot_results:
+ print json.dumps(buildbot_results)
+ if 'completed' in buildbot_results and buildbot_results['completed']:
+ return COMPLETED
+ return FAILED
+
+ if completed_jobs:
+ # This clause is just a fallback. Ideally when a results file shows up at
+ # a gs location, we'd want to run check_buildbot jobs first to find the
+ # url to the job detaisl.
+ print json.dumps({'completed': completed_jobs})
+ return COMPLETED
+ # At this point, no jobs were completed nor failed. We print a char to
+ # prevent buildbot from killing this process for inactivity.
+ sys.stderr.write('Sleeping.\n')
+ sys.stderr.flush()
+ time.sleep(SLEEP_INTERVAL)
+ return TIMED_OUT
if __name__ == '__main__':
sys.exit(main(sys.argv))

Powered by Google App Engine
This is Rietveld 408576698