Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(70)

Unified Diff: infra/tools/antibody/compute_stats.py

Issue 1235373004: Added script to generate stats on a git checkout (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@new_antibody_db_schema
Patch Set: Created 5 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | infra/tools/antibody/git_commit_parser.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: infra/tools/antibody/compute_stats.py
diff --git a/infra/tools/antibody/compute_stats.py b/infra/tools/antibody/compute_stats.py
new file mode 100644
index 0000000000000000000000000000000000000000..db0c94dec9abf5991dceae456d611f7a739b4fbf
--- /dev/null
+++ b/infra/tools/antibody/compute_stats.py
@@ -0,0 +1,189 @@
+# Copyright 2015 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import infra.tools.antibody.cloudsql_connect as csql
+
+
+def totaler(counts_by_date):
+ """Totals the values in a list of lists
+
+ Args:
+ counts_by_date(list): a list of two-item lists where the first index is a
+ month and year and the second is a count
+
+ Return:
+ total(int): the sum of the values
+ """
+ total = sum(i[1] for i in counts_by_date)
+ return total
+
+
+def ratio_calculator(numerator, denominator):
+ """Computes the ratio of the counts in two lists
+
+ Args:
+ numerator(list): list of lists with a month-year string as the first index
+ and a count as the second index
+ denominator(list): list of lists with a month-year string as the first index
+ and a count as the second index
+
+ Return:
+ ratios(list): a list of lists with ratios rounded to three decimal places
+ e.g. [['1-2015', .251], ['10-2014', .742]]
Sergey Berezin 2015/07/16 15:23:55 nit: in most other systems, we usually use YYYY-MM
hinoka 2015/07/17 22:57:46 +1
+ """
+ ratios = []
+ for i in numerator:
+ for j in denominator:
+ if i[0] == j[0] and j[1] != 0:
+ ratios.append([i[0], round(float(i[1]) / j[1], 3)])
Sergey Berezin 2015/07/16 15:23:55 nit: beware of floating point representation being
+ break
+ elif i[0] == j[0]:
+ ratios.append([i[0], 0])
+ return ratios
+
+
+def total_commits(cc):
+ """Counts all the git commits sorted by month and year
+
+ Args:
+ cc(cursor)
+
+ Return:
+ results(list): a list of lists e.g. [['1-2014', 20], ['2-2014', 45]]
+ """
+ cc.execute("""SELECT DISTINCT DATE_FORMAT(`timestamp`, '%Y') FROM
+ git_commit""")
+ years = cc.fetchall()
+ results = []
+ for year in years:
+ for month in xrange(1, 13):
+ cc.execute("""SELECT COUNT(*) FROM git_commit
+ WHERE YEAR(timestamp) = %s AND MONTH(timestamp) = %s"""
+ % (year[0], month))
+ result = cc.fetchone()
+ results.append(['%s-%s' % (month, year[0]), int(result[0])])
+ return results
+
+
+def number_of_suspicious_commits(cc):
+ """Counts the number of commits with no review url or TBRed with no lgtm
+ sorted by month and year
+
+ Args:
+ cc(cursor)
+
+ Return:
+ results(list): a list of lists
+ """
+ cc.execute("""SELECT DISTINCT DATE_FORMAT(`timestamp`, '%Y') FROM
+ git_commit AS `timestamp`""")
+ years = cc.fetchall()
+ results = []
+ for year in years:
+ for month in xrange(1, 13):
+ cc.execute("""SELECT COUNT(*) FROM review
+ INNER JOIN git_commit on review.review_url = git_commit.review_url
+ INNER JOIN commit_people on commit_people.git_commit_hash =
+ git_commit.hash
+ LEFT JOIN (SELECT review_url, COUNT(*) AS c FROM review_people
+ WHERE type = 'lgtm' GROUP BY review_url) lgtm_count on
+ review.review_url = lgtm_count.review_url WHERE
+ lgtm_count.c = 0 OR lgtm_count.c IS NULL AND commit_people.type =
+ 'tbr' AND YEAR(timestamp) = %s AND MONTH(timestamp) = %s""" % (year[0],
+ month))
+ result = cc.fetchone()
+ results.append(['%s-%s' % (month, year[0]), int(result[0])])
+ return results
+
+
+def number_of_blank_tbrs(cc):
+ """Counts the number of occurences of TBR= with no reviewer listed
+
+ Args:
+ cc(cursor)
+
+ Return:
+ results(list): a list of lists
+ """
+ cc.execute("""SELECT DISTINCT DATE_FORMAT(`timestamp`, '%Y') FROM
+ git_commit AS `timestamp`""")
+ years = cc.fetchall()
+ results = []
+ for year in years:
+ for month in xrange(1, 13):
+ cc.execute("""SELECT COUNT(*) FROM commit_people INNER JOIN git_commit
+ ON commit_people.git_commit_hash = git_commit.hash
+ WHERE commit_people.people_email_address = 'NOBODY' AND
+ YEAR(git_commit.timestamp) = %s AND
+ MONTH(git_commit.timestamp) = %s""" % (year[0], month))
+ result = cc.fetchone()
+ results.append(['%s-%s' % (month, year[0]), int(result[0])])
+ return results
+
+
+def number_of_tbrs(cc):
+ """Counts the number of commits with a TBR
+
+ Args:
+ cc(cursor)
+
+ Return:
+ results(list): a list of lists
+ """
+ cc.execute("""SELECT DISTINCT DATE_FORMAT(`timestamp`, '%Y') FROM
+ git_commit AS `timestamp`""")
+ years = cc.fetchall()
+ results = []
+ for year in years:
+ for month in xrange(1, 13):
+ cc.execute("""SELECT COUNT(*) FROM commit_people INNER JOIN git_commit
+ ON commit_people.git_commit_hash = git_commit.hash
+ WHERE commit_people.type = 'tbr' AND
+ YEAR(git_commit.timestamp) = %s AND
+ MONTH(git_commit.timestamp) = %s""" % (year[0], month))
+ result = cc.fetchone()
+ results.append(['%s-%s' % (month, year[0]), int(result[0])])
+ return results
+
+
+# TODO(ksho): add doc string to function
+def get_people_ratio_tbr_total(cc):
+ cc.execute("""CREATE TEMPORARY TABLE addr_tbr_hash (email VARCHAR(200), tbr
+ INT, hash VARCHAR(40))""")
+ cc.execute("""INSERT INTO addr_tbr_hash (email, tbr, hash)
hinoka 2015/07/17 22:57:46 Lets format this with indents, so it looks like IN
+ SELECT commit_people.people_email_address, tbr_count.c,
+ commit_people.git_commit_hash FROM commit_people
+ LEFT JOIN (SELECT git_commit_hash, count(*) AS c FROM commit_people
+ WHERE type='tbr' GROUP BY git_commit_hash) tbr_count
+ ON commit_people.git_commit_hash = tbr_count.git_commit_hash
+ WHERE commit_people.type='author'""")
+ cc.execute("""SELECT email, SUM(CASE WHEN tbr<>0 THEN 1 ELSE 0 END)
hinoka 2015/07/17 22:57:46 I assume you created a new table to do aggregation
+ num_tbrd, COUNT(*) AS num_total FROM addr_tbr_hash GROUP BY email""")
+ # list of [email, num tbr'd commits, num total commits]
+ people_tbr_data = cc.fetchall()
+ people_tbr_data = [[data_item[0], int(data_item[1]), int(data_item[2])]
+ for data_item in people_tbr_data]
+ cc.execute("""DROP TABLE addr_tbr_hash""")
+ return people_tbr_data
+
+
+def compute_all_stats(cc):
+ all_commits_by_date = total_commits(cc)
+ total_num_commits = totaler(all_commits_by_date)
+ suspicious_commits_by_date = number_of_suspicious_commits(cc)
+ all_suspicious_commits = totaler(suspicious_commits_by_date)
+ ratio_all_to_suspicious = ratio_calculator(suspicious_commits_by_date,
+ all_commits_by_date)
+ number_of_blank_tbrs_by_date = number_of_blank_tbrs(cc)
+ total_blank_tbrs = totaler(number_of_blank_tbrs_by_date)
+ number_of_tbrs_by_date = number_of_tbrs(cc)
+ total_num_tbrs = totaler(number_of_tbrs_by_date)
+ ratio_blank_to_all_tbrs = ratio_calculator(number_of_blank_tbrs_by_date,
+ number_of_tbrs_by_date)
+ people_ratio_tbr_total = get_people_ratio_tbr_total(cc)
+ return all_commits_by_date, total_num_commits, suspicious_commits_by_date,
Sergey Berezin 2015/07/16 15:23:55 nit: consider returning a map rather than a tuple,
+ all_suspicious_commits, ratio_all_to_suspicious,
+ number_of_blank_tbrs_by_date, total_blank_tbrs,
+ number_of_tbrs_by_date, total_blank_tbrs, number_of_tbrs_by_date,
+ total_num_tbrs, ratio_blank_to_all_tbrs
« no previous file with comments | « no previous file | infra/tools/antibody/git_commit_parser.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698