OLD | NEW |
---|---|
(Empty) | |
1 # Copyright 2015 The Chromium Authors. All rights reserved. | |
2 # Use of this source code is governed by a BSD-style license that can be | |
3 # found in the LICENSE file. | |
4 | |
5 import json | |
6 | |
7 | |
8 # functions that return stats calculated as lists of lists by month | |
9 def ratio_calculator(numerator, denominator): | |
10 """Computes the ratio of the counts in two lists | |
11 | |
12 Args: | |
13 numerator(list): list of lists with a year-month string as the first index | |
14 and a count as the second index | |
15 denominator(list): list of lists with a year-month string as the first index | |
16 and a count as the second index | |
17 | |
18 Return: | |
19 ratios(list): a list of lists with ratios rounded to three decimal places | |
20 e.g. [['2015-01', .251], ['2014-10', .742]] | |
21 """ | |
22 ratios = [] | |
23 for i in numerator: | |
24 for j in denominator: | |
25 if i[0] == j[0] and j[1] != 0: | |
26 ratios.append([i[0], round(float(i[1]) / j[1], 3)]) | |
27 break | |
28 elif i[0] == j[0]: | |
29 ratios.append([i[0], 0]) | |
30 return ratios | |
31 | |
32 | |
33 def total_commits(cc): | |
34 """Counts all the git commits sorted by month and year | |
35 | |
36 Args: | |
37 cc(cursor) | |
38 | |
39 Return: | |
40 results(list): a list of lists e.g. [['2014-01', 20], ['2014-02', 45]] | |
41 """ | |
42 cc.execute("""SELECT DISTINCT DATE_FORMAT(git_commit.timestamp, '%Y-%m') | |
43 FROM git_commit""") | |
44 months = cc.fetchall() | |
45 results = [] | |
46 for month in months: | |
47 month = month[0] | |
48 cc.execute("""SELECT COUNT(*) | |
49 FROM git_commit | |
50 WHERE YEAR(git_commit.timestamp) = %s | |
51 AND MONTH(git_commit.timestamp) = %s""" % (month[:4], month[5:])) | |
52 result = cc.fetchone() | |
53 results.append([month, int(result[0])]) | |
54 return results | |
pgervais
2015/07/21 16:48:34
(not for this CL)
This can be done in SQL only, an
| |
55 | |
56 | |
57 def total_suspicious(cc): | |
58 """Counts the number of commits with no review url or TBRed with no lgtm | |
59 sorted by month and year | |
60 | |
61 Args: | |
62 cc(cursor) | |
63 | |
64 Return: | |
65 results(list): a list of lists | |
66 """ | |
67 cc.execute("""SELECT DISTINCT DATE_FORMAT(git_commit.timestamp, '%Y-%m') | |
68 FROM git_commit""") | |
69 months = cc.fetchall() | |
70 results = [] | |
71 for month in months: | |
72 month = month[0] | |
73 cc.execute("""SELECT COUNT(*) | |
74 FROM review | |
75 INNER JOIN git_commit | |
76 ON review.review_url = git_commit.review_url | |
77 INNER JOIN commit_people | |
78 ON commit_people.git_commit_hash = git_commit.hash | |
79 LEFT JOIN ( | |
80 SELECT review_url, COUNT(*) AS c | |
81 FROM review_people | |
82 WHERE type = 'lgtm' | |
83 GROUP BY review_url) lgtm_count | |
84 ON review.review_url = lgtm_count.review_url | |
85 WHERE lgtm_count.c = 0 OR lgtm_count.c IS NULL | |
86 AND commit_people.type = 'tbr' | |
87 AND YEAR(git_commit.timestamp) = %s | |
88 AND MONTH(git_commit.timestamp) = %s""" % (month[:4], month[5:])) | |
89 no_lgtm = cc.fetchone() | |
90 cc.execute("""SELECT COUNT(*) | |
91 FROM git_commit | |
92 WHERE review_url IS NULL | |
93 AND YEAR(git_commit.timestamp) = %s | |
94 AND MONTH(git_commit.timestamp) = %s""" % (month[:4], month[5:])) | |
95 no_review = cc.fetchone() | |
96 result = int(no_lgtm[0]) + int(no_review[0]) | |
97 results.append([month, result]) | |
98 return results | |
99 | |
100 | |
101 def total_tbr(cc): | |
102 """Counts the number of commits with a TBR | |
103 | |
104 Args: | |
105 cc(cursor) | |
106 | |
107 Return: | |
108 results(list): a list of lists | |
109 """ | |
110 cc.execute("""SELECT DISTINCT DATE_FORMAT(git_commit.timestamp, '%Y-%m') | |
111 FROM git_commit""") | |
112 months = cc.fetchall() | |
113 results = [] | |
114 for month in months: | |
115 month = month[0] | |
116 cc.execute("""SELECT COUNT(DISTINCT git_commit_hash) | |
117 FROM commit_people | |
118 INNER JOIN git_commit | |
119 ON commit_people.git_commit_hash = git_commit.hash | |
120 WHERE commit_people.type = 'tbr' | |
121 AND YEAR(git_commit.timestamp) = %s | |
122 AND MONTH(git_commit.timestamp) = %s""" % (month[:4], month[5:])) | |
123 result = cc.fetchone() | |
124 results.append([month, int(result[0])]) | |
125 return results | |
126 | |
127 | |
128 def tbr_no_lgtm(cc): | |
129 """Counts the number of commits with a TBR that have not been lgtm'ed | |
130 | |
131 Args: | |
132 cc(cursor) | |
133 | |
134 Return: | |
135 results(list): a list of lists | |
136 """ | |
137 cc.execute("""SELECT DISTINCT DATE_FORMAT(git_commit.timestamp, '%Y-%m') | |
138 FROM git_commit""") | |
139 months = cc.fetchall() | |
140 results = [] | |
141 for month in months: | |
142 month = month[0] | |
143 cc.execute("""SELECT COUNT(*) | |
144 FROM review | |
145 INNER JOIN git_commit ON review.review_url = git_commit.review_url | |
146 INNER JOIN commit_people | |
147 ON commit_people.git_commit_hash = git_commit.hash | |
148 LEFT JOIN ( | |
149 SELECT review_url, COUNT(*) AS c FROM review_people | |
150 WHERE type = 'lgtm' GROUP BY review_url) lgtm_count | |
151 ON review.review_url = lgtm_count.review_url | |
152 WHERE lgtm_count.c = 0 OR lgtm_count.c IS NULL | |
153 AND commit_people.type = 'tbr' | |
154 AND YEAR(git_commit.timestamp) = %s | |
155 AND MONTH(git_commit.timestamp) = %s""" % (month[:4], month[5:])) | |
156 result = cc.fetchone() | |
157 results.append([month, int(result[0])]) | |
158 return results | |
pgervais
2015/07/21 16:48:34
See my comment on total_commits above. You may be
| |
159 | |
160 | |
161 def blank_tbr(cc): | |
162 """Counts the number of occurences of TBR= with no reviewer listed | |
163 | |
164 Args: | |
165 cc(cursor) | |
166 | |
167 Return: | |
168 results(list): a list of lists | |
169 """ | |
170 cc.execute("""SELECT DISTINCT DATE_FORMAT(git_commit.timestamp, '%Y-%m') | |
171 FROM git_commit""") | |
172 months = cc.fetchall() | |
173 results = [] | |
174 for month in months: | |
175 month = month[0] | |
176 cc.execute("""SELECT COUNT(*) | |
177 FROM commit_people | |
178 INNER JOIN git_commit | |
179 ON commit_people.git_commit_hash = git_commit.hash | |
180 WHERE commit_people.people_email_address = 'NOBODY' | |
181 AND YEAR(git_commit.timestamp) = %s | |
182 AND MONTH(git_commit.timestamp) = %s""" % (month[:4], month[5:])) | |
183 result = cc.fetchone() | |
184 results.append([month, int(result[0])]) | |
185 return results | |
186 | |
187 | |
188 def no_review_url(cc): | |
189 """Counts the number of commits with no review url | |
190 | |
191 Args: | |
192 cc(cursor) | |
193 | |
194 Return: | |
195 results(list): a list of lists | |
196 """ | |
197 cc.execute("""SELECT DISTINCT DATE_FORMAT(git_commit.timestamp, '%Y-%m') | |
198 FROM git_commit""") | |
199 months = cc.fetchall() | |
200 results = [] | |
201 for month in months: | |
202 month = month[0] | |
203 cc.execute("""SELECT COUNT(*) | |
204 FROM git_commit | |
205 WHERE review_url IS NULL | |
206 AND YEAR(git_commit.timestamp) = %s | |
207 AND MONTH(git_commit.timestamp) = %s""" % (month[:4], month[5:])) | |
208 result = cc.fetchone() | |
209 results.append([month, int(result[0])]) | |
210 return results | |
211 | |
212 | |
213 # functions that return totaled stats for a set period back in time | |
214 def totaled_ratio_calculator(numerator, denominator): | |
215 """Computes the ratio of the counts in two lists | |
216 | |
217 Args: | |
218 numerator(int): a totaled count | |
219 denominator(int): a totaled count | |
220 | |
221 Return: | |
222 ratio(float): a ratio rounded to three decimal places | |
223 """ | |
224 ratio = round(float(numerator) / denominator, 3) | |
pgervais
2015/07/21 16:48:34
Take into account denominator == 0, as you've done
| |
225 return ratio | |
226 | |
227 | |
228 def totaled_total_commits(cc, sql_time_specification): | |
229 """Counts all the git commits in a given timeframe | |
230 | |
231 Args: | |
232 cc(cursor) | |
233 | |
pgervais
2015/07/21 16:48:34
Describe sql_time_specification (same below)
| |
234 Return: | |
235 result(int): a count of all the commits | |
236 """ | |
237 cc.execute("""SELECT COUNT(*) | |
238 FROM git_commit | |
239 WHERE %s""" % sql_time_specification) | |
240 result = cc.fetchone() | |
241 return int(result[0]) | |
242 | |
243 | |
244 def totaled_total_suspicious(cc, sql_time_specification): | |
245 """Counts the number of commits with no review url or TBRed with no lgtm | |
246 in a given timeframe | |
247 | |
248 Args: | |
249 cc(cursor) | |
250 | |
251 Return: | |
252 result(int): a count of all the suspicious commits | |
253 """ | |
254 cc.execute("""SELECT COUNT(*) | |
255 FROM review | |
256 INNER JOIN git_commit | |
257 ON review.review_url = git_commit.review_url | |
258 INNER JOIN commit_people | |
259 ON commit_people.git_commit_hash = git_commit.hash | |
260 LEFT JOIN ( | |
261 SELECT review_url, COUNT(*) AS c | |
262 FROM review_people | |
263 WHERE type = 'lgtm' | |
264 GROUP BY review_url) lgtm_count | |
265 ON review.review_url = lgtm_count.review_url | |
266 WHERE lgtm_count.c = 0 OR lgtm_count.c IS NULL | |
267 AND commit_people.type = 'tbr' AND %s""" % sql_time_specification) | |
268 no_lgtm = cc.fetchone() | |
269 cc.execute("""SELECT COUNT(*) | |
270 FROM git_commit | |
271 WHERE review_url IS NULL AND %s""" % sql_time_specification) | |
272 no_review = cc.fetchone() | |
273 result = int(no_lgtm[0]) + int(no_review[0]) | |
274 return result | |
275 | |
276 | |
277 def totaled_total_tbr(cc, sql_time_specification): | |
278 """Counts the total number of commits with a TBR in a given timeframe | |
279 | |
280 Args: | |
281 cc(cursor) | |
282 | |
283 Return: | |
284 result(int): a count of all commits with a TBR | |
285 """ | |
286 cc.execute("""SELECT COUNT(DISTINCT git_commit_hash) | |
287 FROM commit_people | |
288 INNER JOIN git_commit | |
289 ON commit_people.git_commit_hash = git_commit.hash | |
290 WHERE commit_people.type = 'tbr' AND %s""" % sql_time_specification) | |
291 result = cc.fetchone() | |
292 return int(result[0]) | |
293 | |
294 | |
295 def totaled_tbr_no_lgtm(cc, sql_time_specification): | |
296 """Counts the number of commits with a TBR that have not been lgtm'ed | |
297 in a given timeframe | |
298 | |
299 Args: | |
300 cc(cursor) | |
301 | |
302 Return: | |
303 result(int): a count of all commits with a TBR and no lgtm | |
304 """ | |
305 cc.execute("""SELECT COUNT(*) | |
306 FROM review | |
307 INNER JOIN git_commit | |
308 ON review.review_url = git_commit.review_url | |
309 INNER JOIN commit_people | |
310 ON commit_people.git_commit_hash = git_commit.hash | |
311 LEFT JOIN ( | |
312 SELECT review_url, COUNT(*) AS c | |
313 FROM review_people | |
314 WHERE type = 'lgtm' | |
315 GROUP BY review_url) lgtm_count | |
316 ON review.review_url = lgtm_count.review_url | |
317 WHERE lgtm_count.c = 0 OR lgtm_count.c IS NULL | |
318 AND commit_people.type = 'tbr' AND %s""" % sql_time_specification) | |
319 result = cc.fetchone() | |
320 return int(result[0]) | |
321 | |
322 | |
323 def totaled_blank_tbr(cc, sql_time_specification): | |
324 """Counts the number of occurences of TBR= with no reviewer listed in a | |
325 given timeframe | |
326 | |
327 Args: | |
328 cc(cursor) | |
329 | |
330 Return: | |
331 result(int): a count of all blank TBRs (TBR=) | |
332 """ | |
333 cc.execute("""SELECT COUNT(*) | |
334 FROM commit_people | |
335 INNER JOIN git_commit | |
336 ON commit_people.git_commit_hash = git_commit.hash | |
337 WHERE commit_people.people_email_address = 'NOBODY' | |
338 AND %s""" % sql_time_specification) | |
339 result = cc.fetchone() | |
340 return int(result[0]) | |
341 | |
342 | |
343 def totaled_no_review_url(cc, sql_time_specification): | |
344 """Counts the number of commits with no review url in a given timeframe | |
345 | |
346 Args: | |
347 cc(cursor) | |
348 | |
349 Return: | |
350 result(int): a count of all commits with no review_url | |
351 """ | |
352 cc.execute("""SELECT COUNT(*) | |
353 FROM git_commit | |
354 WHERE review_url IS NULL AND %s""" % sql_time_specification) | |
355 result = cc.fetchone() | |
356 return int(result[0]) | |
357 | |
358 | |
359 # TODO(keelerh): change the SQL query to avoid using temporary tables | |
360 # add time specification back in | |
361 def tbr_to_total_people_ratio(cc, sql_time_specification=''): | |
362 """Calculate TBR stats by author for the Antibody leaderboard and writes | |
363 them to a json file | |
364 | |
365 Args: | |
366 cc(cursor) | |
367 | |
368 Return: | |
369 people_tbr_data(list): a list of dictionaries containing author email, | |
370 ratio of TBRed commits to total commits, number of | |
371 TBRed commits, total number of commits, and overall | |
372 rank, reverse sorted by ratio | |
373 """ | |
374 cc.execute("""CREATE TEMPORARY TABLE addr_tbr_hash (email VARCHAR(200), tbr | |
pgervais
2015/07/21 16:48:34
If the insert statement fails, the table will rema
| |
375 INT, hash VARCHAR(40), `timestamp` TIMESTAMP)""") | |
376 cc.execute("""INSERT INTO addr_tbr_hash (email, tbr, hash) | |
377 SELECT commit_people.people_email_address, tbr_count.c, | |
378 commit_people.git_commit_hash FROM commit_people | |
379 LEFT JOIN ( | |
380 SELECT git_commit_hash, COUNT(*) | |
381 AS c FROM commit_people | |
382 INNER JOIN git_commit | |
383 ON git_commit.hash = commit_people.git_commit_hash | |
384 WHERE commit_people.type='tbr' %s | |
385 GROUP BY git_commit_hash) tbr_count | |
386 ON commit_people.git_commit_hash = tbr_count.git_commit_hash | |
387 WHERE commit_people.type='author'""" % sql_time_specification) | |
388 cc.execute("""SELECT email, SUM(CASE WHEN tbr<>0 THEN 1 ELSE 0 END) num_tbrd, | |
389 COUNT(*) | |
390 AS num_total | |
391 FROM addr_tbr_hash | |
392 GROUP BY email""") | |
393 people_tbr_data = cc.fetchall() | |
394 people_tbr_data = [[data_item[0], | |
395 round(float(data_item[1]) / data_item[2], 3), | |
396 int(data_item[1]), int(data_item[2])] for data_item in | |
397 people_tbr_data] | |
398 sorted_people_tbr_data = sorted(people_tbr_data, key=lambda x: x[1], | |
399 reverse=True) | |
400 top_100 = sorted_people_tbr_data[:100] | |
401 ordered_people_tbr_data = [] | |
402 for i in range(len(top_100)): | |
403 temp = {} | |
404 temp['email'] = sorted_people_tbr_data[i][0] | |
405 temp['ratio'] = sorted_people_tbr_data[i][1] | |
406 temp['suspicious'] = sorted_people_tbr_data[i][2] | |
407 temp['total'] = sorted_people_tbr_data[i][3] | |
408 temp['rank'] = i | |
409 ordered_people_tbr_data.append(temp) | |
410 cc.execute("""DROP TABLE addr_tbr_hash""") | |
411 return ordered_people_tbr_data | |
412 | |
413 | |
414 def compute_monthly_breakdown_stats(cc): | |
415 """Computes stats broken down by month | |
416 | |
417 Args: | |
418 cc(cursor) | |
419 | |
420 Returns: | |
421 monthly_breakdown_stats(dict): contains stats calculated per month | |
422 including: timeframe, total_commits, | |
423 total_suspicious, suspicious_to_total_ratio, | |
424 total_tbr, tbr_no_lgtm, | |
425 no_tbr_lgtm_to_total_ratio, blank_tbr, | |
426 blank_tbr_to_total_ratio, no_review_url, | |
427 no_review_url_to_total_ratio | |
428 """ | |
429 monthly_breakdown_stats = {} | |
430 monthly_breakdown_stats['timeframe'] = 'monthly_breakdown' | |
431 tot_commits = total_commits(cc) | |
432 monthly_breakdown_stats['total_commits'] = tot_commits | |
433 tot_suspicious = total_suspicious(cc) | |
434 monthly_breakdown_stats['total_suspicious'] = tot_suspicious | |
435 monthly_breakdown_stats['suspicious_to_total_ratio'] = ratio_calculator( | |
436 tot_suspicious, tot_commits) | |
437 tot_tbr = total_tbr(cc) | |
438 monthly_breakdown_stats['total_tbr'] = tot_tbr | |
439 tot_tbr_no_lgtm = tbr_no_lgtm(cc) | |
440 monthly_breakdown_stats['tbr_no_lgtm'] = tot_tbr_no_lgtm | |
441 monthly_breakdown_stats['no_tbr_lgtm_to_total_ratio'] = ratio_calculator( | |
442 tot_tbr_no_lgtm, tot_tbr) | |
443 tot_blank_tbr = blank_tbr(cc) | |
444 monthly_breakdown_stats['blank_tbrs'] = tot_blank_tbr | |
445 monthly_breakdown_stats['blank_tbr_to_total_ratio'] = ratio_calculator( | |
446 tot_blank_tbr, tot_tbr) | |
447 tot_no_review_url = no_review_url(cc) | |
448 monthly_breakdown_stats['no_review_url'] = tot_no_review_url | |
449 monthly_breakdown_stats['no_review_url_to_total_ratio'] = ratio_calculator( | |
450 tot_no_review_url, tot_commits) | |
451 return monthly_breakdown_stats | |
452 | |
453 | |
454 def compute_stats_by_time(cc): | |
455 """Computes the stats for the past 7 days, past 30 days, and all time | |
456 | |
457 Args: | |
458 cc(cursor) | |
459 | |
460 Returns: | |
461 output(list): three dictionaries containing stats for the past 7 days, 30 | |
462 days, and all time respectively, each including timeframe, | |
463 suspicious_to_total_ratio, total_commits, tbr_no_lgtm, | |
464 no_review_url, blank_tbr | |
465 """ | |
466 stats_7_days, stats_30_days, stats_all_time = {}, {}, {} | |
467 for d in [stats_7_days, stats_30_days, stats_all_time]: | |
468 if d == stats_7_days: | |
469 d['timeframe'] = '7_days' | |
pgervais
2015/07/21 16:48:34
Do that on line 466:
stats_7_days = {'timeframe':
| |
470 sql_insert = 'DATEDIFF(NOW(), git_commit.timestamp) <= 7' | |
pgervais
2015/07/21 16:48:34
Just set the value instead of writing a where clau
keelerh
2015/07/22 21:17:55
I was initially going to do that, but then didn't
| |
471 elif d == stats_30_days: | |
472 d['timeframe'] = '30_days' | |
473 sql_insert = 'DATEDIFF(NOW(), git_commit.timestamp) <= 30' | |
474 elif d == stats_all_time: | |
475 d['timeframe'] = 'all_time' | |
476 sql_insert = 'DATEDIFF(git_commit.timestamp, NOW()) < 0' | |
477 tot_commits = totaled_total_commits(cc, sql_insert) | |
478 tot_suspicious = totaled_total_suspicious(cc, sql_insert) | |
479 d['suspicious_to_total_ratio'] = totaled_ratio_calculator(tot_suspicious, | |
480 tot_commits) | |
481 d['total_commits'] = tot_commits | |
482 d['tbr_no_lgtm'] = totaled_tbr_no_lgtm(cc, sql_insert) | |
483 d['no_review_url'] = totaled_no_review_url(cc, sql_insert) | |
484 d['blank_tbr'] = totaled_blank_tbr(cc, sql_insert) | |
485 output = [stats_7_days, stats_30_days, stats_all_time] | |
486 return output | |
487 | |
488 | |
489 def all_monthly_stats(cc, filename): | |
490 """Write stats calculated over different time segments as json to file | |
491 | |
492 Args: | |
493 cc(cursor) | |
494 filename(str): the json file to write to | |
495 """ | |
496 output = {} | |
497 stats_monthly_breakdown = compute_monthly_breakdown_stats(cc) | |
498 stats_by_time = compute_stats_by_time(cc) | |
499 stats_by_time.append(stats_monthly_breakdown) | |
500 for d in stats_by_time: | |
501 label = d['timeframe'] | |
502 output[label] = d | |
503 with open(filename, 'w') as f: | |
504 json.dump(output, f) | |
505 | |
506 | |
507 def all_time_leaderboard(cc, filename): | |
508 """Writes stats for all-time Antibody leaderboard to a json file | |
509 | |
510 Args: | |
511 cc(cursor) | |
512 filename(str): the json file to write to | |
513 """ | |
514 output = tbr_to_total_people_ratio(cc) | |
515 with open(filename, 'w') as f: | |
516 json.dump(output, f) | |
517 | |
518 | |
519 def past_month_leaderboard(cc, filename): | |
520 """Writes stats for the past month Antibody leaderboard to a json file | |
521 | |
522 Args: | |
523 cc(cursor) | |
524 filename(str): the json file to write to | |
525 """ | |
526 sql_time_specification = 'AND DATEDIFF(NOW(), git_commit.timestamp) <= 30' | |
527 output = tbr_to_total_people_ratio(cc, sql_time_specification) | |
528 with open(filename, 'w') as f: | |
529 json.dump(output, f) | |
OLD | NEW |