Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(224)

Side by Side Diff: infra/tools/antibody/compute_stats.py

Issue 1235373004: Added script to generate stats on a git checkout (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@new_antibody_db_schema
Patch Set: Created 5 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | infra/tools/antibody/git_commit_parser.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 # Copyright 2015 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4
5 import infra.tools.antibody.cloudsql_connect as csql
6
7
8 def totaler(counts_by_date):
9 """Totals the values in a list of lists
10
11 Args:
12 counts_by_date(list): a list of two-item lists where the first index is a
13 month and year and the second is a count
14
15 Return:
16 total(int): the sum of the values
17 """
18 total = sum(i[1] for i in counts_by_date)
19 return total
20
21
22 def ratio_calculator(numerator, denominator):
23 """Computes the ratio of the counts in two lists
24
25 Args:
26 numerator(list): list of lists with a month-year string as the first index
27 and a count as the second index
28 denominator(list): list of lists with a month-year string as the first index
29 and a count as the second index
30
31 Return:
32 ratios(list): a list of lists with ratios rounded to three decimal places
33 e.g. [['1-2015', .251], ['10-2014', .742]]
Sergey Berezin 2015/07/16 15:23:55 nit: in most other systems, we usually use YYYY-MM
hinoka 2015/07/17 22:57:46 +1
34 """
35 ratios = []
36 for i in numerator:
37 for j in denominator:
38 if i[0] == j[0] and j[1] != 0:
39 ratios.append([i[0], round(float(i[1]) / j[1], 3)])
Sergey Berezin 2015/07/16 15:23:55 nit: beware of floating point representation being
40 break
41 elif i[0] == j[0]:
42 ratios.append([i[0], 0])
43 return ratios
44
45
46 def total_commits(cc):
47 """Counts all the git commits sorted by month and year
48
49 Args:
50 cc(cursor)
51
52 Return:
53 results(list): a list of lists e.g. [['1-2014', 20], ['2-2014', 45]]
54 """
55 cc.execute("""SELECT DISTINCT DATE_FORMAT(`timestamp`, '%Y') FROM
56 git_commit""")
57 years = cc.fetchall()
58 results = []
59 for year in years:
60 for month in xrange(1, 13):
61 cc.execute("""SELECT COUNT(*) FROM git_commit
62 WHERE YEAR(timestamp) = %s AND MONTH(timestamp) = %s"""
63 % (year[0], month))
64 result = cc.fetchone()
65 results.append(['%s-%s' % (month, year[0]), int(result[0])])
66 return results
67
68
69 def number_of_suspicious_commits(cc):
70 """Counts the number of commits with no review url or TBRed with no lgtm
71 sorted by month and year
72
73 Args:
74 cc(cursor)
75
76 Return:
77 results(list): a list of lists
78 """
79 cc.execute("""SELECT DISTINCT DATE_FORMAT(`timestamp`, '%Y') FROM
80 git_commit AS `timestamp`""")
81 years = cc.fetchall()
82 results = []
83 for year in years:
84 for month in xrange(1, 13):
85 cc.execute("""SELECT COUNT(*) FROM review
86 INNER JOIN git_commit on review.review_url = git_commit.review_url
87 INNER JOIN commit_people on commit_people.git_commit_hash =
88 git_commit.hash
89 LEFT JOIN (SELECT review_url, COUNT(*) AS c FROM review_people
90 WHERE type = 'lgtm' GROUP BY review_url) lgtm_count on
91 review.review_url = lgtm_count.review_url WHERE
92 lgtm_count.c = 0 OR lgtm_count.c IS NULL AND commit_people.type =
93 'tbr' AND YEAR(timestamp) = %s AND MONTH(timestamp) = %s""" % (year[0],
94 month))
95 result = cc.fetchone()
96 results.append(['%s-%s' % (month, year[0]), int(result[0])])
97 return results
98
99
100 def number_of_blank_tbrs(cc):
101 """Counts the number of occurences of TBR= with no reviewer listed
102
103 Args:
104 cc(cursor)
105
106 Return:
107 results(list): a list of lists
108 """
109 cc.execute("""SELECT DISTINCT DATE_FORMAT(`timestamp`, '%Y') FROM
110 git_commit AS `timestamp`""")
111 years = cc.fetchall()
112 results = []
113 for year in years:
114 for month in xrange(1, 13):
115 cc.execute("""SELECT COUNT(*) FROM commit_people INNER JOIN git_commit
116 ON commit_people.git_commit_hash = git_commit.hash
117 WHERE commit_people.people_email_address = 'NOBODY' AND
118 YEAR(git_commit.timestamp) = %s AND
119 MONTH(git_commit.timestamp) = %s""" % (year[0], month))
120 result = cc.fetchone()
121 results.append(['%s-%s' % (month, year[0]), int(result[0])])
122 return results
123
124
125 def number_of_tbrs(cc):
126 """Counts the number of commits with a TBR
127
128 Args:
129 cc(cursor)
130
131 Return:
132 results(list): a list of lists
133 """
134 cc.execute("""SELECT DISTINCT DATE_FORMAT(`timestamp`, '%Y') FROM
135 git_commit AS `timestamp`""")
136 years = cc.fetchall()
137 results = []
138 for year in years:
139 for month in xrange(1, 13):
140 cc.execute("""SELECT COUNT(*) FROM commit_people INNER JOIN git_commit
141 ON commit_people.git_commit_hash = git_commit.hash
142 WHERE commit_people.type = 'tbr' AND
143 YEAR(git_commit.timestamp) = %s AND
144 MONTH(git_commit.timestamp) = %s""" % (year[0], month))
145 result = cc.fetchone()
146 results.append(['%s-%s' % (month, year[0]), int(result[0])])
147 return results
148
149
150 # TODO(ksho): add doc string to function
151 def get_people_ratio_tbr_total(cc):
152 cc.execute("""CREATE TEMPORARY TABLE addr_tbr_hash (email VARCHAR(200), tbr
153 INT, hash VARCHAR(40))""")
154 cc.execute("""INSERT INTO addr_tbr_hash (email, tbr, hash)
hinoka 2015/07/17 22:57:46 Lets format this with indents, so it looks like IN
155 SELECT commit_people.people_email_address, tbr_count.c,
156 commit_people.git_commit_hash FROM commit_people
157 LEFT JOIN (SELECT git_commit_hash, count(*) AS c FROM commit_people
158 WHERE type='tbr' GROUP BY git_commit_hash) tbr_count
159 ON commit_people.git_commit_hash = tbr_count.git_commit_hash
160 WHERE commit_people.type='author'""")
161 cc.execute("""SELECT email, SUM(CASE WHEN tbr<>0 THEN 1 ELSE 0 END)
hinoka 2015/07/17 22:57:46 I assume you created a new table to do aggregation
162 num_tbrd, COUNT(*) AS num_total FROM addr_tbr_hash GROUP BY email""")
163 # list of [email, num tbr'd commits, num total commits]
164 people_tbr_data = cc.fetchall()
165 people_tbr_data = [[data_item[0], int(data_item[1]), int(data_item[2])]
166 for data_item in people_tbr_data]
167 cc.execute("""DROP TABLE addr_tbr_hash""")
168 return people_tbr_data
169
170
171 def compute_all_stats(cc):
172 all_commits_by_date = total_commits(cc)
173 total_num_commits = totaler(all_commits_by_date)
174 suspicious_commits_by_date = number_of_suspicious_commits(cc)
175 all_suspicious_commits = totaler(suspicious_commits_by_date)
176 ratio_all_to_suspicious = ratio_calculator(suspicious_commits_by_date,
177 all_commits_by_date)
178 number_of_blank_tbrs_by_date = number_of_blank_tbrs(cc)
179 total_blank_tbrs = totaler(number_of_blank_tbrs_by_date)
180 number_of_tbrs_by_date = number_of_tbrs(cc)
181 total_num_tbrs = totaler(number_of_tbrs_by_date)
182 ratio_blank_to_all_tbrs = ratio_calculator(number_of_blank_tbrs_by_date,
183 number_of_tbrs_by_date)
184 people_ratio_tbr_total = get_people_ratio_tbr_total(cc)
185 return all_commits_by_date, total_num_commits, suspicious_commits_by_date,
Sergey Berezin 2015/07/16 15:23:55 nit: consider returning a map rather than a tuple,
186 all_suspicious_commits, ratio_all_to_suspicious,
187 number_of_blank_tbrs_by_date, total_blank_tbrs,
188 number_of_tbrs_by_date, total_blank_tbrs, number_of_tbrs_by_date,
189 total_num_tbrs, ratio_blank_to_all_tbrs
OLDNEW
« no previous file with comments | « no previous file | infra/tools/antibody/git_commit_parser.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698