Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2432)

Side by Side Diff: infra/tools/antibody/git_commit_parser.py

Issue 1235373004: Added script to generate stats on a git checkout (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@new_antibody_db_schema
Patch Set: Rebase Created 5 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « infra/tools/antibody/compute_stats.py ('k') | infra/tools/antibody/static/antibody_ui_all.css » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2015 The Chromium Authors. All rights reserved. 1 # Copyright 2015 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import datetime 5 import datetime
6 import dateutil.parser 6 import dateutil.parser
7 import pytz 7 import pytz
8 import re 8 import re
9 import subprocess 9 import subprocess
10 10
11 import infra.tools.antibody.cloudsql_connect as csql 11 import infra.tools.antibody.cloudsql_connect as csql
12 12
13 curr_time = datetime.datetime.now() 13 curr_time = datetime.datetime.now()
14 14
15 15
16 def read_commit_info(git_checkout_path, commits_after_date, 16 def read_commit_info(git_checkout_path, commits_after_date,
17 git_log_format=('%H', '%b', '%ae', 17 git_log_format=('%H', '%b', '%ae',
18 '%ci')): # pragma: no cover 18 '%ci', '%f')): # pragma: no cover
19 """Read commit messages and other information 19 """Read commit messages and other information
20 20
21 Args: 21 Args:
22 git_checkout_path(str): path to a local git checkout 22 git_checkout_path(str): path to a local git checkout
23 git_log_format(str): formatting directives passed to git log --format 23 git_log_format(str): formatting directives passed to git log --format
24
24 Return: 25 Return:
25 log(str): output of git log 26 log(str): output of git log
26 """ 27 """
27 git_log_format = '%x1f'.join(git_log_format) + '%x1e' 28 git_log_format = '%x1f'.join(git_log_format) + '%x1e'
28 log = subprocess.check_output(['git', 'log', 29 log = subprocess.check_output(['git', 'log', 'master',
29 '--format=%s' % git_log_format, '--after=%s' % commits_after_date], 30 '--format=%s' % git_log_format, '--after=%s' % commits_after_date],
30 cwd=git_checkout_path) 31 cwd=git_checkout_path)
31 return log 32 return log
32 33
33 34
34 def parse_commit_info(git_log, 35 def parse_commit_info(git_log,
35 git_commit_fields=('id', 'body', 'author', 36 git_commit_fields=('id', 'body', 'author',
36 'timestamp')): 37 'timestamp', 'subject')):
37 """Seperates the various parts of git commit messages 38 """Seperates the various parts of git commit messages
38 39
39 Args: 40 Args:
40 git_log(str): git commits as --format='%H%x1f%b%xlf%ae%xlf%ci%x1e' 41 git_log(str): git commits as --format='%H%x1f%b%xlf%ae%xlf%ci%xlf%s%x1e'
41 git_commit_fields(tuple): labels for the different components of the 42 git_commit_fields(tuple): labels for the different components of the
42 commit messages corresponding to the --format 43 commit messages corresponding to the --format
43 44
44 Return: 45 Return:
45 git_log_dict(list): list of dictionaries each corresponding to the parsed 46 git_log_dict(list): list of dictionaries each corresponding to the parsed
46 components of a single commit message 47 components of a single commit message
47 """ 48 """
48 git_log_cmds = git_log.strip('\n\x1e').split("\x1e") 49 git_log_cmds = git_log.strip('\n\x1e').split("\x1e")
49 git_log_rows = [row.strip().split("\x1f") for row in git_log_cmds] 50 git_log_rows = [row.strip().split("\x1f") for row in git_log_cmds]
50 git_log_dict = [dict(zip(git_commit_fields, row)) for row in git_log_rows] 51 git_log_dict = [dict(zip(git_commit_fields, row)) for row in git_log_rows]
51 return git_log_dict 52 return git_log_dict
52 53
53 54
54 def get_bug_url(git_line): 55 def get_bug_url(git_line):
55 bug_url = None 56 bug_url = None
56 bug_match = (re.match(r'^BUG=https?://code.google.com/p/(?:chromium' 57 bug_match = (re.match(r'^BUG=https?://code.google.com/p/(?:chromium'
57 '|rietveld)/issues/detail?id=(\d+)', git_line) 58 '|rietveld)/issues/detail?id=(\d+)', git_line)
58 or re.match(r'^BUG=https?://crbug.com/(\d+)', git_line) 59 or re.match(r'^BUG=https?://crbug.com/(\d+)', git_line)
59 or re.match(r'^BUG=chromium:(\d+)', git_line) 60 or re.match(r'^BUG=chromium:(\d+)', git_line)
60 or re.match(r'^BUG=(\d+)', git_line)) 61 or re.match(r'^BUG=(\d+)', git_line))
61 if bug_match: 62 if bug_match:
62 bug_url = bug_match.group(1) 63 bug_url = bug_match.group(1)
63 return bug_url 64 return bug_url
64 65
65 66
66 def get_tbr(git_line): 67 def get_tbr(git_line):
67 tbr = None 68 tbr = None
68 if git_line.startswith('TBR=') and len(git_line) > 4: 69 if git_line.startswith('TBR='):
69 tbr = git_line[4:] 70 if len(git_line) > 4:
70 tbr = [x.strip() for x in tbr.split(',')] 71 tbr = git_line[4:]
72 tbr = [x.strip().split('@')[0] for x in tbr.split(',')]
73 else:
74 tbr = ['NOBODY']
71 return tbr 75 return tbr
72 76
73 77
78 # TODO(keelerh): scan all review urls in a commit and compare the diffs to
79 # identify the correct one
74 def get_review_url(git_line): 80 def get_review_url(git_line):
75 review_url = None 81 review_url = None
76 if re.match(r'^Review:.+$', git_line): 82 if re.match(r'^Review:.+$', git_line):
77 review_url = git_line[8:] 83 review_url = git_line[8:]
78 elif re.match(r'^Review URL:.+$', git_line): 84 elif re.match(r'^Review URL:.+$', git_line):
79 review_url = git_line[12:] 85 review_url = git_line[12:]
80 elif re.match(r'^Code review URL:.+$', git_line): 86 elif re.match(r'^Code review URL:.+$', git_line):
81 review_url = git_line[17:] 87 review_url = git_line[17:]
82 return review_url 88 return review_url
83 89
84 90
85 def get_features_for_git_commit(git_commit): 91 def get_features_for_git_commit(git_commit):
86 """Retrieves the git commit features 92 """Retrieves the git commit features
87 93
88 Arg: 94 Arg:
89 git_commit(dict): a commit message parsed into a dictionary 95 git_commit(dict): a commit message parsed into a dictionary
90 96
91 Return: 97 Return:
92 (tuple): relevant features extracted from the commit message 98 (tuple): relevant features extracted from the commit message
93 """ 99 """
94 git_hash = git_commit['id'] 100 git_hash = git_commit['id']
95 dt = dateutil.parser.parse(git_commit['timestamp']).astimezone(pytz.UTC) 101 dt = dateutil.parser.parse(git_commit['timestamp']).astimezone(pytz.UTC)
96 # dt is a datetime object with timezone info 102 # dt is a datetime object with timezone info
97 timestamp = dt.strftime('%Y-%m-%d %H:%M:%S') 103 timestamp = dt.strftime('%Y-%m-%d %H:%M:%S')
104 subject = git_commit['subject']
98 bug_url, review_URL = None, None 105 bug_url, review_URL = None, None
99 for line in git_commit['body'].split('\n'): 106 for line in git_commit['body'].split('\n'):
100 bug_url = get_bug_url(line) or bug_url 107 bug_url = get_bug_url(line) or bug_url
101 review_URL = get_review_url(line) or review_URL 108 review_URL = get_review_url(line) or review_URL
102 return (git_hash, bug_url, timestamp, review_URL, None) 109 return (git_hash, bug_url, timestamp, review_URL, None, subject)
103 110
104 111
105 def get_features_for_commit_people(git_commit): 112 def get_features_for_commit_people(git_commit):
106 """Retrieves the people associated with a git commit 113 """Retrieves the people associated with a git commit
107 114
108 Arg: 115 Arg:
109 git_commit(dict): a commit message parsed into a dictionary 116 git_commit(dict): a commit message parsed into a dictionary
110 117
111 Return: 118 Return:
112 (tuple): relevant people and type extracted from the commit 119 (tuple): relevant people and type extracted from the commit
113 """ 120 """
114 git_hash = git_commit['id'] 121 git_hash = git_commit['id']
115 author = git_commit['author'] 122 author = git_commit['author'].split('@')[0]
116 people_rows = [(author, git_hash, curr_time, 'author')] 123 people_rows = [(author, git_hash, curr_time, 'author')]
117 TBR = None 124 TBR = None
118 for line in git_commit['body'].split('\n'): 125 for line in git_commit['body'].split('\n'):
119 TBR = get_tbr(line) or TBR 126 TBR = get_tbr(line) or TBR
120 if TBR is not None: 127 if TBR is not None:
121 for person in TBR: 128 for person in TBR:
122 people_rows.append((person, git_hash, curr_time, 'tbr')) 129 people_rows.append((person, git_hash, curr_time, 'tbr'))
123 return people_rows 130 return people_rows
124 131
125 132
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
174 181
175 182
176 def get_urls_from_git_commit(cc): # pragma: no cover 183 def get_urls_from_git_commit(cc): # pragma: no cover
177 """Accesses Cloud SQL instance to find the review urls of the stored 184 """Accesses Cloud SQL instance to find the review urls of the stored
178 commits that have a TBR 185 commits that have a TBR
179 186
180 Arg: 187 Arg:
181 cc: a cursor for the Cloud SQL connection 188 cc: a cursor for the Cloud SQL connection
182 189
183 Return: 190 Return:
184 commits_with_review_urls(list): all the commits in the db w/ TBR 191 commits_with_review_urls(list): all the commits in the db w/ a TBR
185 and have review urls 192 and a review url
186 """ 193 """
187 cc.execute("""SELECT git_commit.review_url, 194 cc.execute("""SELECT git_commit.review_url,
188 commit_people.people_email_address, commit_people.type 195 commit_people.people_email_address, commit_people.type
189 FROM commit_people INNER JOIN (SELECT git_commit_hash, 196 FROM commit_people
190 count(*) as c FROM commit_people WHERE type='tbr' 197 INNER JOIN (
191 GROUP BY git_commit_hash) tbr_count 198 SELECT git_commit_hash, COUNT(*)
199 AS c
200 FROM commit_people
201 WHERE type='tbr'
202 GROUP BY git_commit_hash) tbr_count
192 ON commit_people.git_commit_hash = tbr_count.git_commit_hash 203 ON commit_people.git_commit_hash = tbr_count.git_commit_hash
193 INNER JOIN git_commit 204 INNER JOIN git_commit
194 ON commit_people.git_commit_hash = git_commit.hash 205 ON commit_people.git_commit_hash = git_commit.hash
195 WHERE tbr_count.c <> 0 206 WHERE tbr_count.c <> 0
196 AND git_commit.review_url IS NOT NULL 207 AND git_commit.review_url IS NOT NULL
197 AND commit_people.type='author'""") 208 AND commit_people.type='author'""")
198 commits_with_review_urls = cc.fetchall() 209 commits_with_review_urls = cc.fetchall()
199 return [x[0] for x in commits_with_review_urls] 210 return [x[0] for x in commits_with_review_urls]
OLDNEW
« no previous file with comments | « infra/tools/antibody/compute_stats.py ('k') | infra/tools/antibody/static/antibody_ui_all.css » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698