Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(444)

Side by Side Diff: scripts/slave/recipe_modules/auto_bisect/resources/wait_for_any.py

Issue 1339613005: Refactoring scripts that wait for buildbot jobs to complete. (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/build.git@hax
Patch Set: removing blank line Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 #!/usr/bin/python 1 #!/usr/bin/python
2 #
3 # Copyright 2015 The Chromium Authors. All rights reserved. 2 # Copyright 2015 The Chromium Authors. All rights reserved.
4 # Use of this source code is governed by a BSD-style license that can be 3 # Use of this source code is governed by a BSD-style license that can be
5 # found in the LICENSE file. 4 # found in the LICENSE file.
6 5
7 """Waits for any one job out of a list to complete or a default timeout.""" 6 """Waits for any one job out of a list to complete or a default timeout."""
8 7
9 import json 8 import json
9 import os
10 import subprocess 10 import subprocess
11 import sys 11 import sys
12 import time 12 import time
13 import urllib2
14 13
15 import check_buildbot 14 import check_buildbot
16 15
16 # Return codes.
17 COMPLETED, FAILED, TIMED_OUT, BAD_ARGS = 0, 1, 2, 3
17 18
18 # The following intervals are specified in seconds, are expected to be sent as 19 # The following intervals are specified in seconds, are expected to be sent as
19 # arguments to time.sleep() 20 # arguments to time.sleep()
20 # All URLs are checked in sequence separated by 'short' interval seconds, to 21
21 # prevent possibly getting throttled by whatever endpoint gsutil or urllib are
22 # hitting.
23 SHORT_INTERVAL = 0.4
24 # If none of the URLs is determined to be ready, we sleep for a 'long' 22 # If none of the URLs is determined to be ready, we sleep for a 'long'
25 # interval. 23 # interval.
26 LONG_INTERVAL = 60 24 SLEEP_INTERVAL = 60
27 # We should check buildbot not more often than every 10 minutes. 25 # We should check buildbot not more often than every 10 minutes.
28 BUILDBOT_CHECK_FREQUENCY = 600 26 BUILDBOT_CHECK_INTERVAL = 600
29 # If the 'timeout' interval elapses without any URL becoming ready, we fail. 27
30 timeout_interval = 60 * 60
31 # Global gsutil path, expected to be set by main.
32 gsutil_path = ''
33 next_buildbot_check_due_time = 0 28 next_buildbot_check_due_time = 0
34 29
35 30
36 def _run_gsutil(cmd): 31 def _print_usage(argv):
37 # Sleep for a short time between gsutil calls 32 usage = 'Usage: %s <gsutil path> [--timeout=<seconds>]'
38 time.sleep(SHORT_INTERVAL) 33 print usage % argv[0]
39 cmd = [gsutil_path] + cmd 34 print 'main.__doc__'
40 try: 35 print main.__doc__
41 out = subprocess.check_output(cmd) 36 return BAD_ARGS
42 return 0, out 37
43 except subprocess.CalledProcessError as cpe: 38
44 return cpe.returncode, cpe.output 39 def _gs_file_exists(gsutil_path, url):
45
46
47 def _gs_file_exists(url):
48 """Checks that running 'gsutil ls' returns 0 to see if file at url exists.""" 40 """Checks that running 'gsutil ls' returns 0 to see if file at url exists."""
49 return _run_gsutil(['ls', url])[0] == 0 41 cmd = [gsutil_path, 'ls', url]
42 error = subprocess.call(cmd, stdout=open(os.devnull, 'wb'))
43 return not error
50 44
51 45
52 def _next_buildbot_check_due(): 46 def _next_buildbot_check_due():
47 """To limit how often we pull the [potentially big] json object from bb."""
53 global next_buildbot_check_due_time 48 global next_buildbot_check_due_time
54 if time.time() > next_buildbot_check_due_time: 49 if time.time() > next_buildbot_check_due_time:
55 next_buildbot_check_due_time = time.time() + BUILDBOT_CHECK_FREQUENCY 50 next_buildbot_check_due_time = time.time() + BUILDBOT_CHECK_INTERVAL
51 sys.stderr.write('Checking buildbot for completed/failed jobs')
56 return True 52 return True
57 return False 53 return False
58 54
59 55
60 def _check_failed_buildbot_jobs(locations): 56 def _check_buildbot_jobs(jobs_to_check):
61 if not locations: 57 if not jobs_to_check:
62 return None 58 return None
63 jobs = {} 59 jobs = {}
64 for loc in locations: 60 completed_results = []
65 _, master, builder, job_name = loc.split(':', 3) 61 failed_results = []
62 # Mapping from job names to the original dictionary sent in jobs_to_check
63 entries = {}
64 job_urls = {}
65 for entry in jobs_to_check:
66 master = entry['master']
67 builder = entry['builder']
68 job_name = entry['job_name']
69 # The entries in this list may have multiple jobs for a single builder, and
70 # we want to avoid hitting the builder for each job, since we get the
71 # information for all builds each time.
72 #
73 # To prevent this we are taking this:
74 # [{'master': 'M', 'builder': 'B', 'job_name': 'J1'},
75 # {'master': 'M', 'builder': 'B', 'job_name': 'J2'},
76 # {'master': 'M', 'builder': 'C', 'job_name': 'J3'},
77 # ]
78 # And building this in the jobs variable:
79 # {'M': { 'B': ['J1', 'J2'], 'C': ['J3']}}
66 jobs.setdefault(master, {}).setdefault(builder, []).append(job_name) 80 jobs.setdefault(master, {}).setdefault(builder, []).append(job_name)
81 entries[job_name] = entry
67 for master in jobs.keys(): 82 for master in jobs.keys():
68 for builder in jobs[master].keys(): 83 for builder in jobs[master].keys():
69 if check_buildbot.main(["check_buildbot", master, builder] 84 config = {
70 + jobs[master][builder]): 85 'master': master,
71 return 1 86 'builder': builder,
72 return 0 87 'job_names': jobs[master][builder],
88 }
89 builder_results = check_buildbot.main(config)
90 completed_results += builder_results.get('completed', [])
91 failed_results += builder_results.get('failed', [])
92 job_urls.update(builder_results.get('job_urls', {}))
93 results = {}
94 if completed_results:
95 results['completed'] = [entries[k] for k in completed_results]
96 if failed_results:
97 results['failed'] = [entries[k] for k in failed_results]
98 for job in results.get('failed', []) + results.get('completed', []):
99 if job['job_name'] in job_urls:
100 job['job_url'] = job_urls[job['job_name']]
101
102 return results
73 103
74 104
75 def main(argv): 105 def main(argv):
76 global timeout_interval 106 """Main function of the script.
107
108 The script expects the path to gsutil to be provided on the command line, and
109 a json object containing the details of the jobs to monitor on standard input.
110
111 Each job in the list, should be one of the following types:
112 - GS location, which must at least contain:
113 - The "type" key set to the "gs" value.
114 - The "location" key, containing the location ("gs://...") of the gs
115 object to check.
116 - Buildbot job, which must at least contain:
117 - The "type" key set to the "buildbot" value.
118 - The "master" key containing the name of the appropriate master, e.g.
119 "tryserver.chromium.perf".
120 - The "builder" key set to the name of the builder performing the job.
121 - The "job_name" key containing the name of the job to check. i.e.
122 typically a uuid or a hash will be used.
123
124 The script will wait until the first of the following conditions becomes true:
125 - An object exists at one of the GS locations
126 - One of the buildbot jobs completes as succesful
127 - One of the buildbot jobs fails
128 - One week elapses from the invocation of the script. (The exact timeout may
129 be overriden from the command line)
130
131 The return code will be:
132 0 if a buildbot job succeeds or an object exists at the GS locations.
133 1 if a buildbot job fails
134 2 if the one-week timeout is triggered.
135
136 Additionally, a json object will be written to standard output containig the
137 results of the script.
138
139 Example of expected stdin:
140 {
141 "jobs": [
142 {
143 "type": "gs",
144 "location": "gs://chrome-perf/some_path/some_object.json"
145 },
146 {
147 "type": "buildbot",
148 "master": "tryserver.chromium.perf",
149 "builder": "linux_perf_bisect",
150 "job_name": "f74fb8e0418d47bfb7d01fad0dd4df06"
151 }
152 ]
153 }
154 EOF
155
156 Examples of results from stdout:
157 cat <<EOF #Successful result
158 {
159 "completed": [
160 {
161 "type": "buildbot",
162 "master": "tryserver.chromium.perf",
163 "builder": "linux_perf_bisect",
164 "job_name": "f74fb8e0418d47bfb7d01fad0dd4df06"
165 }
166 ]
167 }
168 EOF
169
170 cat <<EOF #Unsuccessful result
171 {
172 "failed": [
173 {
174 "type": "buildbot",
175 "master": "tryserver.chromium.perf",
176 "builder": "linux_perf_bisect",
177 "job_name": "f74fb8e0418d47bfb7d01fad0dd4df06"
178 }
179 ]
180 }
181 EOF
182 """
183 start_time = time.time()
184 # Default timeout: six days
185 timeout_interval = 6 * 24 * 60 * 60
77 if argv[-1].startswith('--timeout='): 186 if argv[-1].startswith('--timeout='):
78 timeout_interval = int(argv[-1].split('=')[1]) 187 timeout_interval = int(argv[-1].split('=')[1])
79 argv = argv[:-1] 188 argv = argv[:-1]
80 189
81 if len(argv) < 3: 190 jobs = json.loads(sys.stdin.read())['jobs']
82 usage = ('Usage: %s <gsutil path> url1 [url2 [url3...]]' 191 gs_jobs = [job for job in jobs if job['type'] == 'gs']
83 ' [--timeout=<seconds>]\n' 192 buildbot_jobs = [job for job in jobs if job['type'] == 'buildbot']
84 ' Where urls are either a google storage location for the result ' 193
85 ' file, or a buildbot location of the form ' 194 if ((not gs_jobs and not buildbot_jobs) or
86 '"bb:<master>:<builderi>:<job_name>".') 195 (gs_jobs and len(argv) < 2)):
87 print usage % argv[0] 196 return _print_usage(argv)
88 return 1 197
89 198 gsutil_path = argv[1] if gs_jobs else ''
90 list_of_urls = ', '.join(['<%s>' % url for url in argv[2:]]) 199
91 print 'Waiting for the following urls: ' + list_of_urls 200 while time.time() < start_time + timeout_interval:
92 global gsutil_path 201 # Checking GS jobs
93 start_time = time.time() 202 completed_jobs = []
94 gsutil_path = argv[1] 203 for job in gs_jobs:
95 urls = argv[2:] 204 if _gs_file_exists(gsutil_path, job['location']):
96 while urls: 205 completed_jobs.append(job)
97 for url in urls: 206
98 if url.startswith('bb:'): 207 # Checking Buildbot jobs
99 pass 208 if completed_jobs or _next_buildbot_check_due():
100 elif _gs_file_exists(url): 209 # buildbot_results will only contain jobs that have been completed or
101 print 'Build finished: ', url 210 # failed. All other jobs (scheduled, in progress, etc.) will be ignored.
102 return 0 211 buildbot_results = _check_buildbot_jobs(buildbot_jobs)
103 if time.time() - start_time > timeout_interval: 212 if buildbot_results:
104 print "Timed out waiting for: ", urls 213 print json.dumps(buildbot_results)
105 return 1 214 if 'completed' in buildbot_results and buildbot_results['completed']:
106 if _next_buildbot_check_due(): 215 return COMPLETED
107 failed_job = _check_failed_buildbot_jobs( 216 return FAILED
108 [url for url in urls if url.startswith('bb:')]) 217
109 if failed_job: 218 if completed_jobs:
110 return 0 219 # This clause is just a fallback. Ideally when a results file shows up at
111 time.sleep(LONG_INTERVAL) 220 # a gs location, we'd want to run check_buildbot jobs first to find the
112 221 # url to the job detaisl.
113 222 print json.dumps({'completed': completed_jobs})
114 print "No jobs to check." 223 return COMPLETED
115 return 0 224 # At this point, no jobs were completed nor failed. We print a char to
116 225 # prevent buildbot from killing this process for inactivity.
226 sys.stderr.write('Sleeping.\n')
227 sys.stderr.flush()
228 time.sleep(SLEEP_INTERVAL)
229 return TIMED_OUT
117 230
118 if __name__ == '__main__': 231 if __name__ == '__main__':
119 sys.exit(main(sys.argv)) 232 sys.exit(main(sys.argv))
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698