Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(284)

Side by Side Diff: appengine_apps/trooper_o_matic/appengine_module/trooper_o_matic/cron.py

Issue 774323002: Moved trooper_o_matic to appengine/ (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master
Patch Set: Created 6 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # Copyright (c) 2014 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4
5 """Collect stats regularly via app engine cron.
6 """
7
8 import calendar
9 import datetime
10 import json
11 import logging
12
13 import numpy
14 import webapp2
15
16 from google.appengine.api import urlfetch
17 from google.appengine.ext import ndb
18
19 from appengine_module.trooper_o_matic import models
20 from appengine_module.trooper_o_matic import trees
21
22
23 def datetime_now(): # pragma: no cover
24 """Easy to mock datetime.datetime.utcnow() for unit testing."""
25 return datetime.datetime.utcnow()
26
27
28 def date_from_str(string, base_format): # pragma: no cover
29 """Converts a string to a date, taking into account the possible existence
30 of a millisecond precision value."""
31 try:
32 return datetime.datetime.strptime(string, base_format + '.%f')
33 except ValueError:
34 return datetime.datetime.strptime(string, base_format)
35
36
37 class CheckCQHandler(webapp2.RequestHandler): # pragma: no cover
38 """Collect commit queue length and run times."""
39
40 patch_stop_list = ('http://chromium-cq-status.appspot.com/query/action='
41 'patch_stop/?begin=%d')
42
43 pending_api_url = 'https://chromium-commit-queue.appspot.com/api/%s/pending'
44
45 patchset_details = ('https://chromium-cq-status.appspot.com/query/'
46 'issue=%d/patchset=%d/')
47
48 @staticmethod
49 def update_stat_for_times(stat, times):
50 stat.min = times[0]
51 stat.max = times[-1]
52 stat.mean = numpy.mean(times)
53 stat.p10 = numpy.percentile(times, 10)
54 stat.p25 = numpy.percentile(times, 25)
55 stat.p50 = numpy.percentile(times, 50)
56 stat.p75 = numpy.percentile(times, 75)
57 stat.p90 = numpy.percentile(times, 90)
58 stat.p95 = numpy.percentile(times, 95)
59 stat.p99 = numpy.percentile(times, 99)
60
61 def get(self):
62 # We only care about the last hour.
63 cutoff = datetime_now() - datetime.timedelta(hours=1)
64 url = self.patch_stop_list % calendar.timegm(
65 cutoff.timetuple())
66
67 # CQ API has a limit of results it will return, and if there are more
68 # results it will return a cursor. So loop through results until
69 # there is no cursor.
70 cursor = None
71 more_results = True
72 patchsets = {}
73 while more_results:
74 if cursor:
75 url = url + '&cursor=' + cursor
76 result = urlfetch.fetch(url=url, deadline=60)
77 content = json.loads(result.content)
78 for result in content['results']:
79 patchsets.setdefault(result['fields']['project'], set()).add(
80 (result['fields']['issue'], result['fields']['patchset']))
81 cursor = content.get('cursor')
82 more_results = content.get('more')
83
84 # Only track the chromium and blink projects.
85 projects = set(['chromium', 'blink'])
86 for project in projects:
87 # Ensure there is an ancestor for all the stats for this project.
88 project_model = models.Project.get_or_insert(project)
89 project_model.put()
90
91 # CQ exposes an API for its length.
92 result = urlfetch.fetch(url=self.pending_api_url % project, deadline=60)
93 pending = set(json.loads(result.content)['results'])
94 num_pending = len(pending)
95 stat = models.CqStat(parent=project_model.key, length=num_pending)
96 patch_in_queue_stat = models.CqTimeInQueueForPatchStat(
97 parent=project_model.key, length=num_pending)
98 patch_total_time_stat = models.CqTotalTimeForPatchStat(
99 parent=project_model.key, length=num_pending)
100
101 single_run_times = []
102 in_queue_times = []
103 total_times = []
104
105 for patchset in patchsets[project]:
106 url = self.patchset_details % (patchset[0], patchset[1])
107 result = urlfetch.fetch(url=url, deadline=60)
108 content = json.loads(result.content)
109 # Get a list of all starts/stops for this patch.
110 actions = [result['fields'] for result in content['results'] if (
111 result['fields'].get('action') == 'patch_start' or
112 result['fields'].get('action') == 'patch_stop')]
113 actions.sort(key=lambda k: k['timestamp'])
114
115 start_time = None
116 last_start = None
117 end_time = None
118 run_times = []
119 for action in actions:
120 if action['action'] == 'patch_start':
121 if not start_time:
122 start_time = action['timestamp']
123 last_start = action['timestamp']
124 else:
125 if last_start:
126 run_time = (action['timestamp'] - last_start) / 60
127 run_times.append(run_time)
128 last_start = None
129 end_time = action['timestamp']
130
131 if run_times:
132 single_run_times += run_times
133 in_queue_times.append(sum(run_times))
134 total_times.append((end_time - start_time) / 60)
135
136 if single_run_times:
137 self.update_stat_for_times(stat, sorted(single_run_times))
138 self.update_stat_for_times(patch_in_queue_stat, sorted(in_queue_times))
139 self.update_stat_for_times(patch_total_time_stat, sorted(total_times))
140
141 stat.put()
142 patch_in_queue_stat.put()
143 patch_total_time_stat.put()
144
145
146 class CheckTreeHandler(webapp2.RequestHandler): # pragma: no cover
147 """Checks the given tree for build times higher than the SLO specifies."""
148
149 stats_api_url = ('https://chrome-infra-stats.appspot.com/_ah/api/stats/v1/'
150 'steps/%s/overall__build__result__/%s')
151
152 last_hour_format = '%Y-%m-%dT%H:%MZ'
153 generated_format = '%Y-%m-%dT%H:%M:%S'
154
155 def get(self, tree):
156 """For each master in the tree, find builds that don't meet our SLO."""
157 masters = trees.GetMastersForTree(tree)
158 if not masters:
159 logging.error('Invalid tree %s', tree)
160 return
161 now = datetime_now()
162 tree_model = models.Tree.get_or_insert(tree)
163 tree_model.put()
164 stat = models.BuildTimeStat(parent=tree_model.key,
165 timestamp=now,
166 num_builds=0,
167 num_over_median_slo=0,
168 num_over_max_slo=0)
169 # The chrome-infra-stats API lists builds that have STARTED in the last
170 # hour. We want to list builds that have ENDED in the last hour, so we need
171 # to go back through the last 24 hours to make sure we don't miss any.
172 # TODO(sullivan): When an "ended in last hour" API is available, switch
173 # to that.
174 hours = [now - datetime.timedelta(hours=h) for h in range(0, 24)]
175 hour_strs = [hour.strftime(self.last_hour_format) for hour in hours]
176 last_hour = datetime.timedelta(hours=1)
177 for master in masters:
178 records = []
179 urls = [self.stats_api_url % (master, hour_str) for hour_str in hour_strs]
180 for url in urls:
181 logging.info(url)
182 result = urlfetch.fetch(url=url, deadline=60)
183 content = json.loads(result.content)
184 records += content.get('step_records', [])
185 for record in records:
186 generated_time = date_from_str(record['generated'],
187 self.generated_format)
188 if now - generated_time > last_hour:
189 continue
190 stat.num_builds += 1
191 buildtime_median = models.SLO_BUILDTIME_PER_BOT_MEDIAN.get(
192 master, {}).get(record['builder'], models.SLO_BUILDTIME_MEDIAN)
193 buildtime_max = models.SLO_BUILDTIME_PER_BOT_MAX.get(
194 master, {}).get(record['builder'], models.SLO_BUILDTIME_MAX)
195 buildtime_max = max(buildtime_max, buildtime_median)
196
197 if record['step_time'] > buildtime_median:
198 stat.num_over_median_slo += 1
199 v = models.BuildSLOOffender(tree=tree, master=master,
200 builder=record['builder'],
201 buildnumber=int(record['buildnumber']),
202 buildtime=float(record['step_time']),
203 result=int(record['result']),
204 revision=record['revision'],
205 slo_median_buildtime=buildtime_median,
206 slo_max_buildtime=buildtime_max)
207 stat.slo_offenders.append(v)
208 if record['step_time'] > buildtime_max:
209 stat.num_over_max_slo += 1
210 ndb.put_multi(stat.slo_offenders)
211 stat.put()
212
213
214 class CheckTreeStatusHandler(webapp2.RequestHandler): # pragma: no cover
215
216 status_url = ('https://%s-status.appspot.com/allstatus?format=json&'
217 'endTime=%s&limit=1000')
218
219 @staticmethod
220 def tree_is_open_for(entry):
221 # Count scheduled maintenance as tree open, we only want to alert on
222 # unexpected closures.
223 return (entry['can_commit_freely'] or
224 entry['message'].startswith('Tree is closed for maintenance'))
225
226 @staticmethod
227 def date_for( entry):
228 return datetime.datetime.strptime(entry['date'], '%Y-%m-%d %H:%M:%S.%f')
229
230 def fetch_entries(self, project, days):
231 # Get two previous days of data, in case the tree has been in the same
232 # state for the entire time period.
233 data_start = datetime_now() - datetime.timedelta(days=days+2)
234 url = self.status_url % (project, calendar.timegm(data_start.timetuple()))
235 result = urlfetch.fetch(url)
236 entries = json.loads(result.content)
237 entries.sort(key=self.date_for)
238 return entries
239
240 def get_state_of_tree(self, entries, cutoff):
241 # Find the state of the tree before the days started.
242 was_open = True
243 for _, entry in enumerate(entries):
244 if self.date_for(entry) > cutoff:
245 break
246 was_open = self.tree_is_open_for(entry)
247 return was_open
248
249 def get(self, project, days):
250 # Check tree status in last N days
251 days = int(days)
252 now = datetime_now()
253 cutoff = datetime_now() - datetime.timedelta(days=days)
254
255 entries = self.fetch_entries(project, days)
256 was_open = self.get_state_of_tree(entries, cutoff)
257
258 # Now look through the entries in the relevant days to find the tree open
259 # times.
260 last_change = cutoff
261 open_time = datetime.timedelta(seconds=0)
262 closed_time = datetime.timedelta(seconds=0)
263 for entry in entries:
264 is_open = self.tree_is_open_for(entry)
265 if self.date_for(entry) <= cutoff or is_open == was_open:
266 continue
267 current_time = self.date_for(entry)
268 delta = current_time - last_change
269 if was_open:
270 open_time += delta
271 else:
272 closed_time += delta
273 last_change = current_time
274 was_open = is_open
275
276 delta = now - last_change
277 if was_open:
278 open_time += delta
279 else:
280 closed_time += delta
281
282 open_seconds = open_time.total_seconds()
283 closed_seconds = closed_time.total_seconds()
284 project_model = models.Project.get_or_insert(project)
285 project_model.put()
286 stat = models.TreeOpenStat(
287 parent=project_model.key,
288 num_days=days,
289 percent_open=(open_seconds / (open_seconds + closed_seconds)) * 100)
290 stat.put()
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698