Index: infra/tools/antibody/compute_stats.py |
diff --git a/infra/tools/antibody/compute_stats.py b/infra/tools/antibody/compute_stats.py |
new file mode 100644 |
index 0000000000000000000000000000000000000000..db0c94dec9abf5991dceae456d611f7a739b4fbf |
--- /dev/null |
+++ b/infra/tools/antibody/compute_stats.py |
@@ -0,0 +1,189 @@ |
+# Copyright 2015 The Chromium Authors. All rights reserved. |
+# Use of this source code is governed by a BSD-style license that can be |
+# found in the LICENSE file. |
+ |
+import infra.tools.antibody.cloudsql_connect as csql |
+ |
+ |
+def totaler(counts_by_date): |
+ """Totals the values in a list of lists |
+ |
+ Args: |
+ counts_by_date(list): a list of two-item lists where the first index is a |
+ month and year and the second is a count |
+ |
+ Return: |
+ total(int): the sum of the values |
+ """ |
+ total = sum(i[1] for i in counts_by_date) |
+ return total |
+ |
+ |
+def ratio_calculator(numerator, denominator): |
+ """Computes the ratio of the counts in two lists |
+ |
+ Args: |
+ numerator(list): list of lists with a month-year string as the first index |
+ and a count as the second index |
+ denominator(list): list of lists with a month-year string as the first index |
+ and a count as the second index |
+ |
+ Return: |
+ ratios(list): a list of lists with ratios rounded to three decimal places |
+ e.g. [['1-2015', .251], ['10-2014', .742]] |
Sergey Berezin
2015/07/16 15:23:55
nit: in most other systems, we usually use YYYY-MM
hinoka
2015/07/17 22:57:46
+1
|
+ """ |
+ ratios = [] |
+ for i in numerator: |
+ for j in denominator: |
+ if i[0] == j[0] and j[1] != 0: |
+ ratios.append([i[0], round(float(i[1]) / j[1], 3)]) |
Sergey Berezin
2015/07/16 15:23:55
nit: beware of floating point representation being
|
+ break |
+ elif i[0] == j[0]: |
+ ratios.append([i[0], 0]) |
+ return ratios |
+ |
+ |
+def total_commits(cc): |
+ """Counts all the git commits sorted by month and year |
+ |
+ Args: |
+ cc(cursor) |
+ |
+ Return: |
+ results(list): a list of lists e.g. [['1-2014', 20], ['2-2014', 45]] |
+ """ |
+ cc.execute("""SELECT DISTINCT DATE_FORMAT(`timestamp`, '%Y') FROM |
+ git_commit""") |
+ years = cc.fetchall() |
+ results = [] |
+ for year in years: |
+ for month in xrange(1, 13): |
+ cc.execute("""SELECT COUNT(*) FROM git_commit |
+ WHERE YEAR(timestamp) = %s AND MONTH(timestamp) = %s""" |
+ % (year[0], month)) |
+ result = cc.fetchone() |
+ results.append(['%s-%s' % (month, year[0]), int(result[0])]) |
+ return results |
+ |
+ |
+def number_of_suspicious_commits(cc): |
+ """Counts the number of commits with no review url or TBRed with no lgtm |
+ sorted by month and year |
+ |
+ Args: |
+ cc(cursor) |
+ |
+ Return: |
+ results(list): a list of lists |
+ """ |
+ cc.execute("""SELECT DISTINCT DATE_FORMAT(`timestamp`, '%Y') FROM |
+ git_commit AS `timestamp`""") |
+ years = cc.fetchall() |
+ results = [] |
+ for year in years: |
+ for month in xrange(1, 13): |
+ cc.execute("""SELECT COUNT(*) FROM review |
+ INNER JOIN git_commit on review.review_url = git_commit.review_url |
+ INNER JOIN commit_people on commit_people.git_commit_hash = |
+ git_commit.hash |
+ LEFT JOIN (SELECT review_url, COUNT(*) AS c FROM review_people |
+ WHERE type = 'lgtm' GROUP BY review_url) lgtm_count on |
+ review.review_url = lgtm_count.review_url WHERE |
+ lgtm_count.c = 0 OR lgtm_count.c IS NULL AND commit_people.type = |
+ 'tbr' AND YEAR(timestamp) = %s AND MONTH(timestamp) = %s""" % (year[0], |
+ month)) |
+ result = cc.fetchone() |
+ results.append(['%s-%s' % (month, year[0]), int(result[0])]) |
+ return results |
+ |
+ |
+def number_of_blank_tbrs(cc): |
+ """Counts the number of occurences of TBR= with no reviewer listed |
+ |
+ Args: |
+ cc(cursor) |
+ |
+ Return: |
+ results(list): a list of lists |
+ """ |
+ cc.execute("""SELECT DISTINCT DATE_FORMAT(`timestamp`, '%Y') FROM |
+ git_commit AS `timestamp`""") |
+ years = cc.fetchall() |
+ results = [] |
+ for year in years: |
+ for month in xrange(1, 13): |
+ cc.execute("""SELECT COUNT(*) FROM commit_people INNER JOIN git_commit |
+ ON commit_people.git_commit_hash = git_commit.hash |
+ WHERE commit_people.people_email_address = 'NOBODY' AND |
+ YEAR(git_commit.timestamp) = %s AND |
+ MONTH(git_commit.timestamp) = %s""" % (year[0], month)) |
+ result = cc.fetchone() |
+ results.append(['%s-%s' % (month, year[0]), int(result[0])]) |
+ return results |
+ |
+ |
+def number_of_tbrs(cc): |
+ """Counts the number of commits with a TBR |
+ |
+ Args: |
+ cc(cursor) |
+ |
+ Return: |
+ results(list): a list of lists |
+ """ |
+ cc.execute("""SELECT DISTINCT DATE_FORMAT(`timestamp`, '%Y') FROM |
+ git_commit AS `timestamp`""") |
+ years = cc.fetchall() |
+ results = [] |
+ for year in years: |
+ for month in xrange(1, 13): |
+ cc.execute("""SELECT COUNT(*) FROM commit_people INNER JOIN git_commit |
+ ON commit_people.git_commit_hash = git_commit.hash |
+ WHERE commit_people.type = 'tbr' AND |
+ YEAR(git_commit.timestamp) = %s AND |
+ MONTH(git_commit.timestamp) = %s""" % (year[0], month)) |
+ result = cc.fetchone() |
+ results.append(['%s-%s' % (month, year[0]), int(result[0])]) |
+ return results |
+ |
+ |
+# TODO(ksho): add doc string to function |
+def get_people_ratio_tbr_total(cc): |
+ cc.execute("""CREATE TEMPORARY TABLE addr_tbr_hash (email VARCHAR(200), tbr |
+ INT, hash VARCHAR(40))""") |
+ cc.execute("""INSERT INTO addr_tbr_hash (email, tbr, hash) |
hinoka
2015/07/17 22:57:46
Lets format this with indents, so it looks like
IN
|
+ SELECT commit_people.people_email_address, tbr_count.c, |
+ commit_people.git_commit_hash FROM commit_people |
+ LEFT JOIN (SELECT git_commit_hash, count(*) AS c FROM commit_people |
+ WHERE type='tbr' GROUP BY git_commit_hash) tbr_count |
+ ON commit_people.git_commit_hash = tbr_count.git_commit_hash |
+ WHERE commit_people.type='author'""") |
+ cc.execute("""SELECT email, SUM(CASE WHEN tbr<>0 THEN 1 ELSE 0 END) |
hinoka
2015/07/17 22:57:46
I assume you created a new table to do aggregation
|
+ num_tbrd, COUNT(*) AS num_total FROM addr_tbr_hash GROUP BY email""") |
+ # list of [email, num tbr'd commits, num total commits] |
+ people_tbr_data = cc.fetchall() |
+ people_tbr_data = [[data_item[0], int(data_item[1]), int(data_item[2])] |
+ for data_item in people_tbr_data] |
+ cc.execute("""DROP TABLE addr_tbr_hash""") |
+ return people_tbr_data |
+ |
+ |
+def compute_all_stats(cc): |
+ all_commits_by_date = total_commits(cc) |
+ total_num_commits = totaler(all_commits_by_date) |
+ suspicious_commits_by_date = number_of_suspicious_commits(cc) |
+ all_suspicious_commits = totaler(suspicious_commits_by_date) |
+ ratio_all_to_suspicious = ratio_calculator(suspicious_commits_by_date, |
+ all_commits_by_date) |
+ number_of_blank_tbrs_by_date = number_of_blank_tbrs(cc) |
+ total_blank_tbrs = totaler(number_of_blank_tbrs_by_date) |
+ number_of_tbrs_by_date = number_of_tbrs(cc) |
+ total_num_tbrs = totaler(number_of_tbrs_by_date) |
+ ratio_blank_to_all_tbrs = ratio_calculator(number_of_blank_tbrs_by_date, |
+ number_of_tbrs_by_date) |
+ people_ratio_tbr_total = get_people_ratio_tbr_total(cc) |
+ return all_commits_by_date, total_num_commits, suspicious_commits_by_date, |
Sergey Berezin
2015/07/16 15:23:55
nit: consider returning a map rather than a tuple,
|
+ all_suspicious_commits, ratio_all_to_suspicious, |
+ number_of_blank_tbrs_by_date, total_blank_tbrs, |
+ number_of_tbrs_by_date, total_blank_tbrs, number_of_tbrs_by_date, |
+ total_num_tbrs, ratio_blank_to_all_tbrs |