OLD | NEW |
1 # Copyright 2015 The Chromium Authors. All rights reserved. | 1 # Copyright 2015 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 import datetime | 5 import datetime |
6 import dateutil.parser | 6 import dateutil.parser |
7 import pytz | 7 import pytz |
8 import re | 8 import re |
9 import subprocess | 9 import subprocess |
10 | 10 |
11 import infra.tools.antibody.cloudsql_connect as csql | 11 import infra.tools.antibody.cloudsql_connect as csql |
12 | 12 |
13 curr_time = datetime.datetime.now() | 13 curr_time = datetime.datetime.now() |
14 | 14 |
15 | 15 |
16 def read_commit_info(git_checkout_path, commits_after_date, | 16 def read_commit_info(git_checkout_path, commits_after_date, |
17 git_log_format=('%H', '%b', '%ae', | 17 git_log_format=('%H', '%b', '%ae', |
18 '%ci')): # pragma: no cover | 18 '%ci', '%f')): # pragma: no cover |
19 """Read commit messages and other information | 19 """Read commit messages and other information |
20 | 20 |
21 Args: | 21 Args: |
22 git_checkout_path(str): path to a local git checkout | 22 git_checkout_path(str): path to a local git checkout |
23 git_log_format(str): formatting directives passed to git log --format | 23 git_log_format(str): formatting directives passed to git log --format |
| 24 |
24 Return: | 25 Return: |
25 log(str): output of git log | 26 log(str): output of git log |
26 """ | 27 """ |
27 git_log_format = '%x1f'.join(git_log_format) + '%x1e' | 28 git_log_format = '%x1f'.join(git_log_format) + '%x1e' |
28 log = subprocess.check_output(['git', 'log', | 29 log = subprocess.check_output(['git', 'log', 'master', |
29 '--format=%s' % git_log_format, '--after=%s' % commits_after_date], | 30 '--format=%s' % git_log_format, '--after=%s' % commits_after_date], |
30 cwd=git_checkout_path) | 31 cwd=git_checkout_path) |
31 return log | 32 return log |
32 | 33 |
33 | 34 |
34 def parse_commit_info(git_log, | 35 def parse_commit_info(git_log, |
35 git_commit_fields=('id', 'body', 'author', | 36 git_commit_fields=('id', 'body', 'author', |
36 'timestamp')): | 37 'timestamp', 'subject')): |
37 """Seperates the various parts of git commit messages | 38 """Seperates the various parts of git commit messages |
38 | 39 |
39 Args: | 40 Args: |
40 git_log(str): git commits as --format='%H%x1f%b%xlf%ae%xlf%ci%x1e' | 41 git_log(str): git commits as --format='%H%x1f%b%xlf%ae%xlf%ci%xlf%s%x1e' |
41 git_commit_fields(tuple): labels for the different components of the | 42 git_commit_fields(tuple): labels for the different components of the |
42 commit messages corresponding to the --format | 43 commit messages corresponding to the --format |
43 | 44 |
44 Return: | 45 Return: |
45 git_log_dict(list): list of dictionaries each corresponding to the parsed | 46 git_log_dict(list): list of dictionaries each corresponding to the parsed |
46 components of a single commit message | 47 components of a single commit message |
47 """ | 48 """ |
48 git_log_cmds = git_log.strip('\n\x1e').split("\x1e") | 49 git_log_cmds = git_log.strip('\n\x1e').split("\x1e") |
49 git_log_rows = [row.strip().split("\x1f") for row in git_log_cmds] | 50 git_log_rows = [row.strip().split("\x1f") for row in git_log_cmds] |
50 git_log_dict = [dict(zip(git_commit_fields, row)) for row in git_log_rows] | 51 git_log_dict = [dict(zip(git_commit_fields, row)) for row in git_log_rows] |
51 return git_log_dict | 52 return git_log_dict |
52 | 53 |
53 | 54 |
54 def get_bug_url(git_line): | 55 def get_bug_url(git_line): |
55 bug_url = None | 56 bug_url = None |
56 bug_match = (re.match(r'^BUG=https?://code.google.com/p/(?:chromium' | 57 bug_match = (re.match(r'^BUG=https?://code.google.com/p/(?:chromium' |
57 '|rietveld)/issues/detail?id=(\d+)', git_line) | 58 '|rietveld)/issues/detail?id=(\d+)', git_line) |
58 or re.match(r'^BUG=https?://crbug.com/(\d+)', git_line) | 59 or re.match(r'^BUG=https?://crbug.com/(\d+)', git_line) |
59 or re.match(r'^BUG=chromium:(\d+)', git_line) | 60 or re.match(r'^BUG=chromium:(\d+)', git_line) |
60 or re.match(r'^BUG=(\d+)', git_line)) | 61 or re.match(r'^BUG=(\d+)', git_line)) |
61 if bug_match: | 62 if bug_match: |
62 bug_url = bug_match.group(1) | 63 bug_url = bug_match.group(1) |
63 return bug_url | 64 return bug_url |
64 | 65 |
65 | 66 |
66 def get_tbr(git_line): | 67 def get_tbr(git_line): |
67 tbr = None | 68 tbr = None |
68 if git_line.startswith('TBR=') and len(git_line) > 4: | 69 if git_line.startswith('TBR='): |
69 tbr = git_line[4:] | 70 if len(git_line) > 4: |
70 tbr = [x.strip() for x in tbr.split(',')] | 71 tbr = git_line[4:] |
| 72 tbr = [x.strip().split('@')[0] for x in tbr.split(',')] |
| 73 else: |
| 74 tbr = ['NOBODY'] |
71 return tbr | 75 return tbr |
72 | 76 |
73 | 77 |
| 78 # TODO(keelerh): scan all review urls in a commit and compare the diffs to |
| 79 # identify the correct one |
74 def get_review_url(git_line): | 80 def get_review_url(git_line): |
75 review_url = None | 81 review_url = None |
76 if re.match(r'^Review:.+$', git_line): | 82 if re.match(r'^Review:.+$', git_line): |
77 review_url = git_line[8:] | 83 review_url = git_line[8:] |
78 elif re.match(r'^Review URL:.+$', git_line): | 84 elif re.match(r'^Review URL:.+$', git_line): |
79 review_url = git_line[12:] | 85 review_url = git_line[12:] |
80 elif re.match(r'^Code review URL:.+$', git_line): | 86 elif re.match(r'^Code review URL:.+$', git_line): |
81 review_url = git_line[17:] | 87 review_url = git_line[17:] |
82 return review_url | 88 return review_url |
83 | 89 |
84 | 90 |
85 def get_features_for_git_commit(git_commit): | 91 def get_features_for_git_commit(git_commit): |
86 """Retrieves the git commit features | 92 """Retrieves the git commit features |
87 | 93 |
88 Arg: | 94 Arg: |
89 git_commit(dict): a commit message parsed into a dictionary | 95 git_commit(dict): a commit message parsed into a dictionary |
90 | 96 |
91 Return: | 97 Return: |
92 (tuple): relevant features extracted from the commit message | 98 (tuple): relevant features extracted from the commit message |
93 """ | 99 """ |
94 git_hash = git_commit['id'] | 100 git_hash = git_commit['id'] |
95 dt = dateutil.parser.parse(git_commit['timestamp']).astimezone(pytz.UTC) | 101 dt = dateutil.parser.parse(git_commit['timestamp']).astimezone(pytz.UTC) |
96 # dt is a datetime object with timezone info | 102 # dt is a datetime object with timezone info |
97 timestamp = dt.strftime('%Y-%m-%d %H:%M:%S') | 103 timestamp = dt.strftime('%Y-%m-%d %H:%M:%S') |
| 104 subject = git_commit['subject'] |
98 bug_url, review_URL = None, None | 105 bug_url, review_URL = None, None |
99 for line in git_commit['body'].split('\n'): | 106 for line in git_commit['body'].split('\n'): |
100 bug_url = get_bug_url(line) or bug_url | 107 bug_url = get_bug_url(line) or bug_url |
101 review_URL = get_review_url(line) or review_URL | 108 review_URL = get_review_url(line) or review_URL |
102 return (git_hash, bug_url, timestamp, review_URL, None) | 109 return (git_hash, bug_url, timestamp, review_URL, None, subject) |
103 | 110 |
104 | 111 |
105 def get_features_for_commit_people(git_commit): | 112 def get_features_for_commit_people(git_commit): |
106 """Retrieves the people associated with a git commit | 113 """Retrieves the people associated with a git commit |
107 | 114 |
108 Arg: | 115 Arg: |
109 git_commit(dict): a commit message parsed into a dictionary | 116 git_commit(dict): a commit message parsed into a dictionary |
110 | 117 |
111 Return: | 118 Return: |
112 (tuple): relevant people and type extracted from the commit | 119 (tuple): relevant people and type extracted from the commit |
113 """ | 120 """ |
114 git_hash = git_commit['id'] | 121 git_hash = git_commit['id'] |
115 author = git_commit['author'] | 122 author = git_commit['author'].split('@')[0] |
116 people_rows = [(author, git_hash, curr_time, 'author')] | 123 people_rows = [(author, git_hash, curr_time, 'author')] |
117 TBR = None | 124 TBR = None |
118 for line in git_commit['body'].split('\n'): | 125 for line in git_commit['body'].split('\n'): |
119 TBR = get_tbr(line) or TBR | 126 TBR = get_tbr(line) or TBR |
120 if TBR is not None: | 127 if TBR is not None: |
121 for person in TBR: | 128 for person in TBR: |
122 people_rows.append((person, git_hash, curr_time, 'tbr')) | 129 people_rows.append((person, git_hash, curr_time, 'tbr')) |
123 return people_rows | 130 return people_rows |
124 | 131 |
125 | 132 |
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
174 | 181 |
175 | 182 |
176 def get_urls_from_git_commit(cc): # pragma: no cover | 183 def get_urls_from_git_commit(cc): # pragma: no cover |
177 """Accesses Cloud SQL instance to find the review urls of the stored | 184 """Accesses Cloud SQL instance to find the review urls of the stored |
178 commits that have a TBR | 185 commits that have a TBR |
179 | 186 |
180 Arg: | 187 Arg: |
181 cc: a cursor for the Cloud SQL connection | 188 cc: a cursor for the Cloud SQL connection |
182 | 189 |
183 Return: | 190 Return: |
184 commits_with_review_urls(list): all the commits in the db w/ TBR | 191 commits_with_review_urls(list): all the commits in the db w/ a TBR |
185 and have review urls | 192 and a review url |
186 """ | 193 """ |
187 cc.execute("""SELECT git_commit.review_url, | 194 cc.execute("""SELECT git_commit.review_url, |
188 commit_people.people_email_address, commit_people.type | 195 commit_people.people_email_address, commit_people.type |
189 FROM commit_people INNER JOIN (SELECT git_commit_hash, | 196 FROM commit_people |
190 count(*) as c FROM commit_people WHERE type='tbr' | 197 INNER JOIN ( |
191 GROUP BY git_commit_hash) tbr_count | 198 SELECT git_commit_hash, COUNT(*) |
| 199 AS c |
| 200 FROM commit_people |
| 201 WHERE type='tbr' |
| 202 GROUP BY git_commit_hash) tbr_count |
192 ON commit_people.git_commit_hash = tbr_count.git_commit_hash | 203 ON commit_people.git_commit_hash = tbr_count.git_commit_hash |
193 INNER JOIN git_commit | 204 INNER JOIN git_commit |
194 ON commit_people.git_commit_hash = git_commit.hash | 205 ON commit_people.git_commit_hash = git_commit.hash |
195 WHERE tbr_count.c <> 0 | 206 WHERE tbr_count.c <> 0 |
196 AND git_commit.review_url IS NOT NULL | 207 AND git_commit.review_url IS NOT NULL |
197 AND commit_people.type='author'""") | 208 AND commit_people.type='author'""") |
198 commits_with_review_urls = cc.fetchall() | 209 commits_with_review_urls = cc.fetchall() |
199 return [x[0] for x in commits_with_review_urls] | 210 return [x[0] for x in commits_with_review_urls] |
OLD | NEW |