Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(756)

Unified Diff: scripts/slave/recipe_modules/auto_bisect/resources/wait_for_any.py

Issue 1339613005: Refactoring scripts that wait for buildbot jobs to complete. (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/build.git@hax
Patch Set: . Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: scripts/slave/recipe_modules/auto_bisect/resources/wait_for_any.py
diff --git a/scripts/slave/recipe_modules/auto_bisect/resources/wait_for_any.py b/scripts/slave/recipe_modules/auto_bisect/resources/wait_for_any.py
index 27be07da84d6987d9161c656541e9f039f33a708..ad8d65fd4beb55224e8d4764dd03295199455893 100755
--- a/scripts/slave/recipe_modules/auto_bisect/resources/wait_for_any.py
+++ b/scripts/slave/recipe_modules/auto_bisect/resources/wait_for_any.py
@@ -1,5 +1,4 @@
#!/usr/bin/python
-#
# Copyright 2015 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
@@ -7,113 +6,203 @@
"""Waits for any one job out of a list to complete or a default timeout."""
import json
+import os
import subprocess
import sys
import time
-import urllib2
import check_buildbot
+# Return codes. Note that at this time COMPLETED == FAILED.
+COMPLETED, FAILED, TIMED_OUT, NOT_EVEN_STARTED = 0, 0, 2, 3
prasadv 2015/09/17 22:33:35 COMPLETED == FAILED, shouldn't we mark the failed
RobertoCN 2015/09/19 00:32:54 Completed now != failed. NOT_EVEN_STARTED, means t
# The following intervals are specified in seconds, are expected to be sent as
# arguments to time.sleep()
-# All URLs are checked in sequence separated by 'short' interval seconds, to
-# prevent possibly getting throttled by whatever endpoint gsutil or urllib are
-# hitting.
-SHORT_INTERVAL = 0.4
+
# If none of the URLs is determined to be ready, we sleep for a 'long'
# interval.
-LONG_INTERVAL = 60
+SLEEP_INTERVAL = 60
# We should check buildbot not more often than every 10 minutes.
-BUILDBOT_CHECK_FREQUENCY = 600
-# If the 'timeout' interval elapses without any URL becoming ready, we fail.
-timeout_interval = 60 * 60
-# Global gsutil path, expected to be set by main.
-gsutil_path = ''
+BUILDBOT_CHECK_INTERVAL = 600
+
next_buildbot_check_due_time = 0
-def _run_gsutil(cmd):
- # Sleep for a short time between gsutil calls
- time.sleep(SHORT_INTERVAL)
- cmd = [gsutil_path] + cmd
- try:
- out = subprocess.check_output(cmd)
- return 0, out
- except subprocess.CalledProcessError as cpe:
- return cpe.returncode, cpe.output
+def _print_usage(argv):
+ usage = 'Usage: %s <gsutil path> [--timeout=<seconds>]'
+ print usage % argv[0]
+ print 'main.__doc__'
+ print main.__doc__
+ return NOT_EVEN_STARTED
-def _gs_file_exists(url):
+def _gs_file_exists(gsutil_path, url):
"""Checks that running 'gsutil ls' returns 0 to see if file at url exists."""
- return _run_gsutil(['ls', url])[0] == 0
+ cmd = [gsutil_path, 'ls', url]
+ error = subprocess.call(cmd, stderr=open(os.devnull, 'w'))
+ return not error
def _next_buildbot_check_due():
+ """To limit how often we pull the [potentially big] json object from bb."""
global next_buildbot_check_due_time
if time.time() > next_buildbot_check_due_time:
- next_buildbot_check_due_time = time.time() + BUILDBOT_CHECK_FREQUENCY
+ next_buildbot_check_due_time = time.time() + BUILDBOT_CHECK_INTERVAL
return True
return False
-def _check_failed_buildbot_jobs(locations):
- if not locations:
+def _check_buildbot_jobs(jobs_to_check):
+ if not jobs_to_check:
return None
jobs = {}
- for loc in locations:
- _, master, builder, job_name = loc.split(':', 3)
+ completed_results = []
+ failed_results = []
+ # Mapping from job names to the original dictionary sent in jobs_to_check
+ entries = {}
+ for entry in jobs_to_check:
+ master = entry['master']
+ builder = entry['builder']
+ job_name = entry['job_name']
+ # Building a nested dictionary so that we check at most once per builder.
prasadv 2015/09/17 22:33:35 May be we should elaborate these comments.
RobertoCN 2015/09/19 00:32:54 Done.
jobs.setdefault(master, {}).setdefault(builder, []).append(job_name)
+ entries[job_name] = entry
for master in jobs.keys():
for builder in jobs[master].keys():
- if check_buildbot.main(["check_buildbot", master, builder]
- + jobs[master][builder]):
- return 1
- return 0
+ config = {
+ 'master': master,
+ 'builder': builder,
+ 'job_names': jobs[master][builder],
+ }
+ builder_results = check_buildbot.main(config)
+ completed_results += builder_results.get('completed', [])
+ failed_results += builder_results.get('failed', [])
+ results = {}
+ if completed_results:
+ results['completed'] = [entries[k] for k in completed_results]
+ if failed_results:
+ results['failed'] = [entries[k] for k in failed_results]
+ return results
def main(argv):
- global timeout_interval
+ """Main function of the script.
+
+ The script expects the path to gsutil to be provided on the command line, and
+ a json object containing the details of the jobs to monitor on standard input.
+
+ Each job in the list, should be one of the following types:
+ - GS location, which must at least contain:
+ - The "type" key set to the "gs" value.
+ - The "location" key, containing the location ("gs://...") of the gs
+ object to check.
+ - Buildbot job, which must at least contain:
+ - The "type" key set to the "buildbot" value.
+ - The "master" key containing the name of the appropriate master, e.g.
+ "chromium.perf".
prasadv 2015/09/17 22:33:35 May be tryserver.chromium.perf be more relevant
RobertoCN 2015/09/19 00:32:54 Done.
+ - The "builder" key set to the name of the builder performing the job.
+ - The "job_name" key containing the name of the job to check. i.e.
+ typically a uuid or a hash will be used.
+
+ The script will wait until the first of the following conditions becomes true:
+ - An object exists at one of the GS locations
+ - One of the buildbot jobs completes as succesful
+ - One of the buildbot jobs fails
+ - One week elapses from the invocation of the script. (The exact timeout may
+ be overriden from the command line)
+
+ The return code will be:
+ 0 if a buildbot job succeeds or an object exists at the GS locations.
+ 1 if a buildbot job fails
+ 2 if the one-week timeout is triggered.
prasadv 2015/09/17 22:33:35 What if the case where buildbot job succeeds but t
RobertoCN 2015/09/19 00:32:54 In that case it will still return 0. We won't wait
+
+ Additionally, a json object will be written to standard output containig the
+ results of the script.
+
+ Example of expected stdin:
+ {
+ "jobs": [
+ {
+ "type": "gs",
+ "location": "gs://chrome-perf/some_path/some_object.json"
+ },
+ {
+ "type": "buildbot",
+ "master": "tryserver.chromium.perf",
+ "builder": "linux_perf_bisect",
+ "job_name": "f74fb8e0418d47bfb7d01fad0dd4df06"
+ }
+ ]
+ }
+ EOF
+
+ Examples of results from stdout:
+ cat <<EOF #Successful result
+ {
+ "completed": [
+ {
+ "type": "buildbot",
+ "master": "tryserver.chromium.perf",
+ "builder": "linux_perf_bisect",
+ "job_name": "f74fb8e0418d47bfb7d01fad0dd4df06"
+ }
+ ]
+ }
+ EOF
+
+ cat <<EOF #Unsuccessful result
+ {
+ "failed": [
+ {
+ "type": "buildbot",
+ "master": "tryserver.chromium.perf",
+ "builder": "linux_perf_bisect",
+ "job_name": "f74fb8e0418d47bfb7d01fad0dd4df06"
+ }
+ ]
+ }
+ EOF
+ """
+ start_time = time.time()
+ # Default timeout: six days
+ timeout_interval = 6 * 24 * 60 * 60
if argv[-1].startswith('--timeout='):
timeout_interval = int(argv[-1].split('=')[1])
argv = argv[:-1]
- if len(argv) < 3:
- usage = ('Usage: %s <gsutil path> url1 [url2 [url3...]]'
- ' [--timeout=<seconds>]\n'
- ' Where urls are either a google storage location for the result '
- ' file, or a buildbot location of the form '
- '"bb:<master>:<builderi>:<job_name>".')
- print usage % argv[0]
- return 1
-
- list_of_urls = ', '.join(['<%s>' % url for url in argv[2:]])
- print 'Waiting for the following urls: ' + list_of_urls
- global gsutil_path
- start_time = time.time()
- gsutil_path = argv[1]
- urls = argv[2:]
- while urls:
- for url in urls:
- if url.startswith('bb:'):
- pass
- elif _gs_file_exists(url):
- print 'Build finished: ', url
- return 0
- if time.time() - start_time > timeout_interval:
- print "Timed out waiting for: ", urls
- return 1
- if _next_buildbot_check_due():
- failed_job = _check_failed_buildbot_jobs(
- [url for url in urls if url.startswith('bb:')])
- if failed_job:
- return 0
- time.sleep(LONG_INTERVAL)
+ jobs = json.loads(sys.stdin.read())['jobs']
+ gs_jobs = [job for job in jobs if job['type'] == 'gs']
+ buildbot_jobs = [job for job in jobs if job['type'] == 'buildbot']
+
+ if ((not gs_jobs and not buildbot_jobs) or
+ (gs_jobs and len(argv) < 2)):
+ return _print_usage(argv)
+ gsutil_path = argv[1] if gs_jobs else ''
- print "No jobs to check."
- return 0
+ while time.time() < start_time + timeout_interval:
+ # Checking GS jobs
+ completed_jobs = []
+ for job in gs_jobs:
+ if _gs_file_exists(gsutil_path, job['location']):
+ completed_jobs.append(job)
+ if completed_jobs:
+ print json.dumps({'completed': completed_jobs})
+ return COMPLETED
+ # Checking Buildbot jobs
+ if _next_buildbot_check_due():
+ buildbot_results = _check_buildbot_jobs(buildbot_jobs)
+ if buildbot_results:
+ print json.dumps(buildbot_results)
+ if 'completed' in buildbot_results and buildbot_results['completed']:
+ return COMPLETED
+ return FAILED
prasadv 2015/09/17 22:33:35 How are we handling pending jobs, I mean jobs in q
RobertoCN 2015/09/19 00:32:54 They are considered in progress, as they are neith
+
+ sys.stdout.write('.')
+ sys.stdout.flush()
+ time.sleep(SLEEP_INTERVAL)
+ return TIMED_OUT
if __name__ == '__main__':
sys.exit(main(sys.argv))

Powered by Google App Engine
This is Rietveld 408576698