Chromium Code Reviews| Index: infra/tools/antibody/compute_stats.py |
| diff --git a/infra/tools/antibody/compute_stats.py b/infra/tools/antibody/compute_stats.py |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..db0c94dec9abf5991dceae456d611f7a739b4fbf |
| --- /dev/null |
| +++ b/infra/tools/antibody/compute_stats.py |
| @@ -0,0 +1,189 @@ |
| +# Copyright 2015 The Chromium Authors. All rights reserved. |
| +# Use of this source code is governed by a BSD-style license that can be |
| +# found in the LICENSE file. |
| + |
| +import infra.tools.antibody.cloudsql_connect as csql |
| + |
| + |
| +def totaler(counts_by_date): |
| + """Totals the values in a list of lists |
| + |
| + Args: |
| + counts_by_date(list): a list of two-item lists where the first index is a |
| + month and year and the second is a count |
| + |
| + Return: |
| + total(int): the sum of the values |
| + """ |
| + total = sum(i[1] for i in counts_by_date) |
| + return total |
| + |
| + |
| +def ratio_calculator(numerator, denominator): |
| + """Computes the ratio of the counts in two lists |
| + |
| + Args: |
| + numerator(list): list of lists with a month-year string as the first index |
| + and a count as the second index |
| + denominator(list): list of lists with a month-year string as the first index |
| + and a count as the second index |
| + |
| + Return: |
| + ratios(list): a list of lists with ratios rounded to three decimal places |
| + e.g. [['1-2015', .251], ['10-2014', .742]] |
|
Sergey Berezin
2015/07/16 15:23:55
nit: in most other systems, we usually use YYYY-MM
hinoka
2015/07/17 22:57:46
+1
|
| + """ |
| + ratios = [] |
| + for i in numerator: |
| + for j in denominator: |
| + if i[0] == j[0] and j[1] != 0: |
| + ratios.append([i[0], round(float(i[1]) / j[1], 3)]) |
|
Sergey Berezin
2015/07/16 15:23:55
nit: beware of floating point representation being
|
| + break |
| + elif i[0] == j[0]: |
| + ratios.append([i[0], 0]) |
| + return ratios |
| + |
| + |
| +def total_commits(cc): |
| + """Counts all the git commits sorted by month and year |
| + |
| + Args: |
| + cc(cursor) |
| + |
| + Return: |
| + results(list): a list of lists e.g. [['1-2014', 20], ['2-2014', 45]] |
| + """ |
| + cc.execute("""SELECT DISTINCT DATE_FORMAT(`timestamp`, '%Y') FROM |
| + git_commit""") |
| + years = cc.fetchall() |
| + results = [] |
| + for year in years: |
| + for month in xrange(1, 13): |
| + cc.execute("""SELECT COUNT(*) FROM git_commit |
| + WHERE YEAR(timestamp) = %s AND MONTH(timestamp) = %s""" |
| + % (year[0], month)) |
| + result = cc.fetchone() |
| + results.append(['%s-%s' % (month, year[0]), int(result[0])]) |
| + return results |
| + |
| + |
| +def number_of_suspicious_commits(cc): |
| + """Counts the number of commits with no review url or TBRed with no lgtm |
| + sorted by month and year |
| + |
| + Args: |
| + cc(cursor) |
| + |
| + Return: |
| + results(list): a list of lists |
| + """ |
| + cc.execute("""SELECT DISTINCT DATE_FORMAT(`timestamp`, '%Y') FROM |
| + git_commit AS `timestamp`""") |
| + years = cc.fetchall() |
| + results = [] |
| + for year in years: |
| + for month in xrange(1, 13): |
| + cc.execute("""SELECT COUNT(*) FROM review |
| + INNER JOIN git_commit on review.review_url = git_commit.review_url |
| + INNER JOIN commit_people on commit_people.git_commit_hash = |
| + git_commit.hash |
| + LEFT JOIN (SELECT review_url, COUNT(*) AS c FROM review_people |
| + WHERE type = 'lgtm' GROUP BY review_url) lgtm_count on |
| + review.review_url = lgtm_count.review_url WHERE |
| + lgtm_count.c = 0 OR lgtm_count.c IS NULL AND commit_people.type = |
| + 'tbr' AND YEAR(timestamp) = %s AND MONTH(timestamp) = %s""" % (year[0], |
| + month)) |
| + result = cc.fetchone() |
| + results.append(['%s-%s' % (month, year[0]), int(result[0])]) |
| + return results |
| + |
| + |
| +def number_of_blank_tbrs(cc): |
| + """Counts the number of occurences of TBR= with no reviewer listed |
| + |
| + Args: |
| + cc(cursor) |
| + |
| + Return: |
| + results(list): a list of lists |
| + """ |
| + cc.execute("""SELECT DISTINCT DATE_FORMAT(`timestamp`, '%Y') FROM |
| + git_commit AS `timestamp`""") |
| + years = cc.fetchall() |
| + results = [] |
| + for year in years: |
| + for month in xrange(1, 13): |
| + cc.execute("""SELECT COUNT(*) FROM commit_people INNER JOIN git_commit |
| + ON commit_people.git_commit_hash = git_commit.hash |
| + WHERE commit_people.people_email_address = 'NOBODY' AND |
| + YEAR(git_commit.timestamp) = %s AND |
| + MONTH(git_commit.timestamp) = %s""" % (year[0], month)) |
| + result = cc.fetchone() |
| + results.append(['%s-%s' % (month, year[0]), int(result[0])]) |
| + return results |
| + |
| + |
| +def number_of_tbrs(cc): |
| + """Counts the number of commits with a TBR |
| + |
| + Args: |
| + cc(cursor) |
| + |
| + Return: |
| + results(list): a list of lists |
| + """ |
| + cc.execute("""SELECT DISTINCT DATE_FORMAT(`timestamp`, '%Y') FROM |
| + git_commit AS `timestamp`""") |
| + years = cc.fetchall() |
| + results = [] |
| + for year in years: |
| + for month in xrange(1, 13): |
| + cc.execute("""SELECT COUNT(*) FROM commit_people INNER JOIN git_commit |
| + ON commit_people.git_commit_hash = git_commit.hash |
| + WHERE commit_people.type = 'tbr' AND |
| + YEAR(git_commit.timestamp) = %s AND |
| + MONTH(git_commit.timestamp) = %s""" % (year[0], month)) |
| + result = cc.fetchone() |
| + results.append(['%s-%s' % (month, year[0]), int(result[0])]) |
| + return results |
| + |
| + |
| +# TODO(ksho): add doc string to function |
| +def get_people_ratio_tbr_total(cc): |
| + cc.execute("""CREATE TEMPORARY TABLE addr_tbr_hash (email VARCHAR(200), tbr |
| + INT, hash VARCHAR(40))""") |
| + cc.execute("""INSERT INTO addr_tbr_hash (email, tbr, hash) |
|
hinoka
2015/07/17 22:57:46
Lets format this with indents, so it looks like
IN
|
| + SELECT commit_people.people_email_address, tbr_count.c, |
| + commit_people.git_commit_hash FROM commit_people |
| + LEFT JOIN (SELECT git_commit_hash, count(*) AS c FROM commit_people |
| + WHERE type='tbr' GROUP BY git_commit_hash) tbr_count |
| + ON commit_people.git_commit_hash = tbr_count.git_commit_hash |
| + WHERE commit_people.type='author'""") |
| + cc.execute("""SELECT email, SUM(CASE WHEN tbr<>0 THEN 1 ELSE 0 END) |
|
hinoka
2015/07/17 22:57:46
I assume you created a new table to do aggregation
|
| + num_tbrd, COUNT(*) AS num_total FROM addr_tbr_hash GROUP BY email""") |
| + # list of [email, num tbr'd commits, num total commits] |
| + people_tbr_data = cc.fetchall() |
| + people_tbr_data = [[data_item[0], int(data_item[1]), int(data_item[2])] |
| + for data_item in people_tbr_data] |
| + cc.execute("""DROP TABLE addr_tbr_hash""") |
| + return people_tbr_data |
| + |
| + |
| +def compute_all_stats(cc): |
| + all_commits_by_date = total_commits(cc) |
| + total_num_commits = totaler(all_commits_by_date) |
| + suspicious_commits_by_date = number_of_suspicious_commits(cc) |
| + all_suspicious_commits = totaler(suspicious_commits_by_date) |
| + ratio_all_to_suspicious = ratio_calculator(suspicious_commits_by_date, |
| + all_commits_by_date) |
| + number_of_blank_tbrs_by_date = number_of_blank_tbrs(cc) |
| + total_blank_tbrs = totaler(number_of_blank_tbrs_by_date) |
| + number_of_tbrs_by_date = number_of_tbrs(cc) |
| + total_num_tbrs = totaler(number_of_tbrs_by_date) |
| + ratio_blank_to_all_tbrs = ratio_calculator(number_of_blank_tbrs_by_date, |
| + number_of_tbrs_by_date) |
| + people_ratio_tbr_total = get_people_ratio_tbr_total(cc) |
| + return all_commits_by_date, total_num_commits, suspicious_commits_by_date, |
|
Sergey Berezin
2015/07/16 15:23:55
nit: consider returning a map rather than a tuple,
|
| + all_suspicious_commits, ratio_all_to_suspicious, |
| + number_of_blank_tbrs_by_date, total_blank_tbrs, |
| + number_of_tbrs_by_date, total_blank_tbrs, number_of_tbrs_by_date, |
| + total_num_tbrs, ratio_blank_to_all_tbrs |