OLD | NEW |
---|---|
(Empty) | |
1 # Copyright 2015 The Chromium Authors. All rights reserved. | |
2 # Use of this source code is governed by a BSD-style license that can be | |
3 # found in the LICENSE file. | |
4 | |
5 import infra.tools.antibody.cloudsql_connect as csql | |
6 | |
7 | |
8 def totaler(counts_by_date): | |
9 """Totals the values in a list of lists | |
10 | |
11 Args: | |
12 counts_by_date(list): a list of two-item lists where the first index is a | |
13 month and year and the second is a count | |
14 | |
15 Return: | |
16 total(int): the sum of the values | |
17 """ | |
18 total = sum(i[1] for i in counts_by_date) | |
19 return total | |
20 | |
21 | |
22 def ratio_calculator(numerator, denominator): | |
23 """Computes the ratio of the counts in two lists | |
24 | |
25 Args: | |
26 numerator(list): list of lists with a month-year string as the first index | |
27 and a count as the second index | |
28 denominator(list): list of lists with a month-year string as the first index | |
29 and a count as the second index | |
30 | |
31 Return: | |
32 ratios(list): a list of lists with ratios rounded to three decimal places | |
33 e.g. [['1-2015', .251], ['10-2014', .742]] | |
Sergey Berezin
2015/07/16 15:23:55
nit: in most other systems, we usually use YYYY-MM
hinoka
2015/07/17 22:57:46
+1
| |
34 """ | |
35 ratios = [] | |
36 for i in numerator: | |
37 for j in denominator: | |
38 if i[0] == j[0] and j[1] != 0: | |
39 ratios.append([i[0], round(float(i[1]) / j[1], 3)]) | |
Sergey Berezin
2015/07/16 15:23:55
nit: beware of floating point representation being
| |
40 break | |
41 elif i[0] == j[0]: | |
42 ratios.append([i[0], 0]) | |
43 return ratios | |
44 | |
45 | |
46 def total_commits(cc): | |
47 """Counts all the git commits sorted by month and year | |
48 | |
49 Args: | |
50 cc(cursor) | |
51 | |
52 Return: | |
53 results(list): a list of lists e.g. [['1-2014', 20], ['2-2014', 45]] | |
54 """ | |
55 cc.execute("""SELECT DISTINCT DATE_FORMAT(`timestamp`, '%Y') FROM | |
56 git_commit""") | |
57 years = cc.fetchall() | |
58 results = [] | |
59 for year in years: | |
60 for month in xrange(1, 13): | |
61 cc.execute("""SELECT COUNT(*) FROM git_commit | |
62 WHERE YEAR(timestamp) = %s AND MONTH(timestamp) = %s""" | |
63 % (year[0], month)) | |
64 result = cc.fetchone() | |
65 results.append(['%s-%s' % (month, year[0]), int(result[0])]) | |
66 return results | |
67 | |
68 | |
69 def number_of_suspicious_commits(cc): | |
70 """Counts the number of commits with no review url or TBRed with no lgtm | |
71 sorted by month and year | |
72 | |
73 Args: | |
74 cc(cursor) | |
75 | |
76 Return: | |
77 results(list): a list of lists | |
78 """ | |
79 cc.execute("""SELECT DISTINCT DATE_FORMAT(`timestamp`, '%Y') FROM | |
80 git_commit AS `timestamp`""") | |
81 years = cc.fetchall() | |
82 results = [] | |
83 for year in years: | |
84 for month in xrange(1, 13): | |
85 cc.execute("""SELECT COUNT(*) FROM review | |
86 INNER JOIN git_commit on review.review_url = git_commit.review_url | |
87 INNER JOIN commit_people on commit_people.git_commit_hash = | |
88 git_commit.hash | |
89 LEFT JOIN (SELECT review_url, COUNT(*) AS c FROM review_people | |
90 WHERE type = 'lgtm' GROUP BY review_url) lgtm_count on | |
91 review.review_url = lgtm_count.review_url WHERE | |
92 lgtm_count.c = 0 OR lgtm_count.c IS NULL AND commit_people.type = | |
93 'tbr' AND YEAR(timestamp) = %s AND MONTH(timestamp) = %s""" % (year[0], | |
94 month)) | |
95 result = cc.fetchone() | |
96 results.append(['%s-%s' % (month, year[0]), int(result[0])]) | |
97 return results | |
98 | |
99 | |
100 def number_of_blank_tbrs(cc): | |
101 """Counts the number of occurences of TBR= with no reviewer listed | |
102 | |
103 Args: | |
104 cc(cursor) | |
105 | |
106 Return: | |
107 results(list): a list of lists | |
108 """ | |
109 cc.execute("""SELECT DISTINCT DATE_FORMAT(`timestamp`, '%Y') FROM | |
110 git_commit AS `timestamp`""") | |
111 years = cc.fetchall() | |
112 results = [] | |
113 for year in years: | |
114 for month in xrange(1, 13): | |
115 cc.execute("""SELECT COUNT(*) FROM commit_people INNER JOIN git_commit | |
116 ON commit_people.git_commit_hash = git_commit.hash | |
117 WHERE commit_people.people_email_address = 'NOBODY' AND | |
118 YEAR(git_commit.timestamp) = %s AND | |
119 MONTH(git_commit.timestamp) = %s""" % (year[0], month)) | |
120 result = cc.fetchone() | |
121 results.append(['%s-%s' % (month, year[0]), int(result[0])]) | |
122 return results | |
123 | |
124 | |
125 def number_of_tbrs(cc): | |
126 """Counts the number of commits with a TBR | |
127 | |
128 Args: | |
129 cc(cursor) | |
130 | |
131 Return: | |
132 results(list): a list of lists | |
133 """ | |
134 cc.execute("""SELECT DISTINCT DATE_FORMAT(`timestamp`, '%Y') FROM | |
135 git_commit AS `timestamp`""") | |
136 years = cc.fetchall() | |
137 results = [] | |
138 for year in years: | |
139 for month in xrange(1, 13): | |
140 cc.execute("""SELECT COUNT(*) FROM commit_people INNER JOIN git_commit | |
141 ON commit_people.git_commit_hash = git_commit.hash | |
142 WHERE commit_people.type = 'tbr' AND | |
143 YEAR(git_commit.timestamp) = %s AND | |
144 MONTH(git_commit.timestamp) = %s""" % (year[0], month)) | |
145 result = cc.fetchone() | |
146 results.append(['%s-%s' % (month, year[0]), int(result[0])]) | |
147 return results | |
148 | |
149 | |
150 # TODO(ksho): add doc string to function | |
151 def get_people_ratio_tbr_total(cc): | |
152 cc.execute("""CREATE TEMPORARY TABLE addr_tbr_hash (email VARCHAR(200), tbr | |
153 INT, hash VARCHAR(40))""") | |
154 cc.execute("""INSERT INTO addr_tbr_hash (email, tbr, hash) | |
hinoka
2015/07/17 22:57:46
Lets format this with indents, so it looks like
IN
| |
155 SELECT commit_people.people_email_address, tbr_count.c, | |
156 commit_people.git_commit_hash FROM commit_people | |
157 LEFT JOIN (SELECT git_commit_hash, count(*) AS c FROM commit_people | |
158 WHERE type='tbr' GROUP BY git_commit_hash) tbr_count | |
159 ON commit_people.git_commit_hash = tbr_count.git_commit_hash | |
160 WHERE commit_people.type='author'""") | |
161 cc.execute("""SELECT email, SUM(CASE WHEN tbr<>0 THEN 1 ELSE 0 END) | |
hinoka
2015/07/17 22:57:46
I assume you created a new table to do aggregation
| |
162 num_tbrd, COUNT(*) AS num_total FROM addr_tbr_hash GROUP BY email""") | |
163 # list of [email, num tbr'd commits, num total commits] | |
164 people_tbr_data = cc.fetchall() | |
165 people_tbr_data = [[data_item[0], int(data_item[1]), int(data_item[2])] | |
166 for data_item in people_tbr_data] | |
167 cc.execute("""DROP TABLE addr_tbr_hash""") | |
168 return people_tbr_data | |
169 | |
170 | |
171 def compute_all_stats(cc): | |
172 all_commits_by_date = total_commits(cc) | |
173 total_num_commits = totaler(all_commits_by_date) | |
174 suspicious_commits_by_date = number_of_suspicious_commits(cc) | |
175 all_suspicious_commits = totaler(suspicious_commits_by_date) | |
176 ratio_all_to_suspicious = ratio_calculator(suspicious_commits_by_date, | |
177 all_commits_by_date) | |
178 number_of_blank_tbrs_by_date = number_of_blank_tbrs(cc) | |
179 total_blank_tbrs = totaler(number_of_blank_tbrs_by_date) | |
180 number_of_tbrs_by_date = number_of_tbrs(cc) | |
181 total_num_tbrs = totaler(number_of_tbrs_by_date) | |
182 ratio_blank_to_all_tbrs = ratio_calculator(number_of_blank_tbrs_by_date, | |
183 number_of_tbrs_by_date) | |
184 people_ratio_tbr_total = get_people_ratio_tbr_total(cc) | |
185 return all_commits_by_date, total_num_commits, suspicious_commits_by_date, | |
Sergey Berezin
2015/07/16 15:23:55
nit: consider returning a map rather than a tuple,
| |
186 all_suspicious_commits, ratio_all_to_suspicious, | |
187 number_of_blank_tbrs_by_date, total_blank_tbrs, | |
188 number_of_tbrs_by_date, total_blank_tbrs, number_of_tbrs_by_date, | |
189 total_num_tbrs, ratio_blank_to_all_tbrs | |
OLD | NEW |