appengine_apps/trooper_o_matic/appengine_module/trooper_o_matic/cron.py - Issue 774323002: Moved trooper_o_matic to appengine/

Side by Side Diff: appengine_apps/trooper_o_matic/appengine_module/trooper_o_matic/cron.py

Issue 774323002: Moved trooper_o_matic to appengine/ (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master

Patch Set: Created 6 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « appengine_apps/trooper_o_matic/appengine_module/trooper_o_matic/cq.py ('k') | appengine_apps/trooper_o_matic/appengine_module/trooper_o_matic/main.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
	(Empty)
1 # Copyright (c) 2014 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.

4

5 """Collect stats regularly via app engine cron.

6 """

7

8 import calendar

9 import datetime

10 import json

11 import logging

12

13 import numpy

14 import webapp2

15

16 from google.appengine.api import urlfetch

17 from google.appengine.ext import ndb

18

19 from appengine_module.trooper_o_matic import models

20 from appengine_module.trooper_o_matic import trees

21

22

23 def datetime_now(): # pragma: no cover

24 """Easy to mock datetime.datetime.utcnow() for unit testing."""

25 return datetime.datetime.utcnow()

26

27

28 def date_from_str(string, base_format): # pragma: no cover

29 """Converts a string to a date, taking into account the possible existence

30 of a millisecond precision value."""

31 try:

32 return datetime.datetime.strptime(string, base_format + '.%f')

33 except ValueError:

34 return datetime.datetime.strptime(string, base_format)

35

36

37 class CheckCQHandler(webapp2.RequestHandler): # pragma: no cover

38 """Collect commit queue length and run times."""

39

40 patch_stop_list = ('http://chromium-cq-status.appspot.com/query/action='

41 'patch_stop/?begin=%d')

42

43 pending_api_url = 'https://chromium-commit-queue.appspot.com/api/%s/pending'

44

45 patchset_details = ('https://chromium-cq-status.appspot.com/query/'

46 'issue=%d/patchset=%d/')

47

48 @staticmethod

49 def update_stat_for_times(stat, times):

50 stat.min = times[0]

51 stat.max = times[-1]

52 stat.mean = numpy.mean(times)

53 stat.p10 = numpy.percentile(times, 10)

54 stat.p25 = numpy.percentile(times, 25)

55 stat.p50 = numpy.percentile(times, 50)

56 stat.p75 = numpy.percentile(times, 75)

57 stat.p90 = numpy.percentile(times, 90)

58 stat.p95 = numpy.percentile(times, 95)

59 stat.p99 = numpy.percentile(times, 99)

60

61 def get(self):

62 # We only care about the last hour.

63 cutoff = datetime_now() - datetime.timedelta(hours=1)

64 url = self.patch_stop_list % calendar.timegm(

65 cutoff.timetuple())

66

67 # CQ API has a limit of results it will return, and if there are more

68 # results it will return a cursor. So loop through results until

69 # there is no cursor.

70 cursor = None

71 more_results = True

72 patchsets = {}

73 while more_results:

74 if cursor:

75 url = url + '&cursor=' + cursor

76 result = urlfetch.fetch(url=url, deadline=60)

77 content = json.loads(result.content)

78 for result in content['results']:

79 patchsets.setdefault(result['fields']['project'], set()).add(

80 (result['fields']['issue'], result['fields']['patchset']))

81 cursor = content.get('cursor')

82 more_results = content.get('more')

83

84 # Only track the chromium and blink projects.

85 projects = set(['chromium', 'blink'])

86 for project in projects:

87 # Ensure there is an ancestor for all the stats for this project.

88 project_model = models.Project.get_or_insert(project)

89 project_model.put()

90

91 # CQ exposes an API for its length.

92 result = urlfetch.fetch(url=self.pending_api_url % project, deadline=60)

93 pending = set(json.loads(result.content)['results'])

94 num_pending = len(pending)

95 stat = models.CqStat(parent=project_model.key, length=num_pending)

96 patch_in_queue_stat = models.CqTimeInQueueForPatchStat(

97 parent=project_model.key, length=num_pending)

98 patch_total_time_stat = models.CqTotalTimeForPatchStat(

99 parent=project_model.key, length=num_pending)

100

101 single_run_times = []

102 in_queue_times = []

103 total_times = []

104

105 for patchset in patchsets[project]:

106 url = self.patchset_details % (patchset[0], patchset[1])

107 result = urlfetch.fetch(url=url, deadline=60)

108 content = json.loads(result.content)

109 # Get a list of all starts/stops for this patch.

110 actions = [result['fields'] for result in content['results'] if (

111 result['fields'].get('action') == 'patch_start' or

112 result['fields'].get('action') == 'patch_stop')]

113 actions.sort(key=lambda k: k['timestamp'])

114

115 start_time = None

116 last_start = None

117 end_time = None

118 run_times = []

119 for action in actions:

120 if action['action'] == 'patch_start':

121 if not start_time:

122 start_time = action['timestamp']

123 last_start = action['timestamp']

124 else:

125 if last_start:

126 run_time = (action['timestamp'] - last_start) / 60

127 run_times.append(run_time)

128 last_start = None

129 end_time = action['timestamp']

130

131 if run_times:

132 single_run_times += run_times

133 in_queue_times.append(sum(run_times))

134 total_times.append((end_time - start_time) / 60)

135

136 if single_run_times:

137 self.update_stat_for_times(stat, sorted(single_run_times))

138 self.update_stat_for_times(patch_in_queue_stat, sorted(in_queue_times))

139 self.update_stat_for_times(patch_total_time_stat, sorted(total_times))

140

141 stat.put()

142 patch_in_queue_stat.put()

143 patch_total_time_stat.put()

144

145

146 class CheckTreeHandler(webapp2.RequestHandler): # pragma: no cover

147 """Checks the given tree for build times higher than the SLO specifies."""

148

149 stats_api_url = ('https://chrome-infra-stats.appspot.com/_ah/api/stats/v1/'

150 'steps/%s/overall__build__result__/%s')

151

152 last_hour_format = '%Y-%m-%dT%H:%MZ'

153 generated_format = '%Y-%m-%dT%H:%M:%S'

154

155 def get(self, tree):

156 """For each master in the tree, find builds that don't meet our SLO."""

157 masters = trees.GetMastersForTree(tree)

158 if not masters:

159 logging.error('Invalid tree %s', tree)

160 return

161 now = datetime_now()

162 tree_model = models.Tree.get_or_insert(tree)

163 tree_model.put()

164 stat = models.BuildTimeStat(parent=tree_model.key,

165 timestamp=now,

166 num_builds=0,

167 num_over_median_slo=0,

168 num_over_max_slo=0)

169 # The chrome-infra-stats API lists builds that have STARTED in the last

170 # hour. We want to list builds that have ENDED in the last hour, so we need

171 # to go back through the last 24 hours to make sure we don't miss any.

172 # TODO(sullivan): When an "ended in last hour" API is available, switch

173 # to that.

174 hours = [now - datetime.timedelta(hours=h) for h in range(0, 24)]

175 hour_strs = [hour.strftime(self.last_hour_format) for hour in hours]

176 last_hour = datetime.timedelta(hours=1)

177 for master in masters:

178 records = []

179 urls = [self.stats_api_url % (master, hour_str) for hour_str in hour_strs]

180 for url in urls:

181 logging.info(url)

182 result = urlfetch.fetch(url=url, deadline=60)

183 content = json.loads(result.content)

184 records += content.get('step_records', [])

185 for record in records:

186 generated_time = date_from_str(record['generated'],

187 self.generated_format)

188 if now - generated_time > last_hour:

189 continue

190 stat.num_builds += 1

191 buildtime_median = models.SLO_BUILDTIME_PER_BOT_MEDIAN.get(

192 master, {}).get(record['builder'], models.SLO_BUILDTIME_MEDIAN)

193 buildtime_max = models.SLO_BUILDTIME_PER_BOT_MAX.get(

194 master, {}).get(record['builder'], models.SLO_BUILDTIME_MAX)

195 buildtime_max = max(buildtime_max, buildtime_median)

196

197 if record['step_time'] > buildtime_median:

198 stat.num_over_median_slo += 1

199 v = models.BuildSLOOffender(tree=tree, master=master,

200 builder=record['builder'],

201 buildnumber=int(record['buildnumber']),

202 buildtime=float(record['step_time']),

203 result=int(record['result']),

204 revision=record['revision'],

205 slo_median_buildtime=buildtime_median,

206 slo_max_buildtime=buildtime_max)

207 stat.slo_offenders.append(v)

208 if record['step_time'] > buildtime_max:

209 stat.num_over_max_slo += 1

210 ndb.put_multi(stat.slo_offenders)

211 stat.put()

212

213

214 class CheckTreeStatusHandler(webapp2.RequestHandler): # pragma: no cover

215

216 status_url = ('https://%s-status.appspot.com/allstatus?format=json&'

217 'endTime=%s&limit=1000')

218

219 @staticmethod

220 def tree_is_open_for(entry):

221 # Count scheduled maintenance as tree open, we only want to alert on

222 # unexpected closures.

223 return (entry['can_commit_freely'] or

224 entry['message'].startswith('Tree is closed for maintenance'))

225

226 @staticmethod

227 def date_for( entry):

228 return datetime.datetime.strptime(entry['date'], '%Y-%m-%d %H:%M:%S.%f')

229

230 def fetch_entries(self, project, days):

231 # Get two previous days of data, in case the tree has been in the same

232 # state for the entire time period.

233 data_start = datetime_now() - datetime.timedelta(days=days+2)

234 url = self.status_url % (project, calendar.timegm(data_start.timetuple()))

235 result = urlfetch.fetch(url)

236 entries = json.loads(result.content)

237 entries.sort(key=self.date_for)

238 return entries

239

240 def get_state_of_tree(self, entries, cutoff):

241 # Find the state of the tree before the days started.

242 was_open = True

243 for _, entry in enumerate(entries):

244 if self.date_for(entry) > cutoff:

245 break

246 was_open = self.tree_is_open_for(entry)

247 return was_open

248

249 def get(self, project, days):

250 # Check tree status in last N days

251 days = int(days)

252 now = datetime_now()

253 cutoff = datetime_now() - datetime.timedelta(days=days)

254

255 entries = self.fetch_entries(project, days)

256 was_open = self.get_state_of_tree(entries, cutoff)

257

258 # Now look through the entries in the relevant days to find the tree open

259 # times.

260 last_change = cutoff

261 open_time = datetime.timedelta(seconds=0)

262 closed_time = datetime.timedelta(seconds=0)

263 for entry in entries:

264 is_open = self.tree_is_open_for(entry)

265 if self.date_for(entry) <= cutoff or is_open == was_open:

266 continue

267 current_time = self.date_for(entry)

268 delta = current_time - last_change

269 if was_open:

270 open_time += delta

271 else:

272 closed_time += delta

273 last_change = current_time

274 was_open = is_open

275

276 delta = now - last_change

277 if was_open:

278 open_time += delta

279 else:

280 closed_time += delta

281

282 open_seconds = open_time.total_seconds()

283 closed_seconds = closed_time.total_seconds()

284 project_model = models.Project.get_or_insert(project)

285 project_model.put()

286 stat = models.TreeOpenStat(

287 parent=project_model.key,

288 num_days=days,

289 percent_open=(open_seconds / (open_seconds + closed_seconds)) * 100)

290 stat.put()

OLD	NEW