Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(542)

Side by Side Diff: appengine/findit/common/local_git_parsers.py

Issue 2435863003: [Findit] Add local git parsers. (Closed)
Patch Set: Fix nits. Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4
5 """Parse output of local git commands into Gitile response format."""
6
7 from collections import defaultdict
8 from datetime import datetime
9 import re
10
11 from common import repo_util
12 from common.blame import Blame
13 from common.blame import Region
14 from common.change_log import ChangeLog
15
16 REGION_START_COUNT_PATTERN = re.compile(r'^(\S+) \d+ (\d+) (\d+)')
17
18 DATETIME_FORMAT = '%Y-%m-%d %H:%M:%S'
19
20 AUTHOR_NAME_PATTERN = re.compile(r'^author (.*)')
21 AUTHOR_MAIL_PATTERN = re.compile(r'^author-mail (\S+)')
22 AUTHOR_TIME_PATTERN = re.compile(r'^author-time (.+)')
23
24 COMMITTER_NAME_PATTERN = re.compile(r'^committer (.*)')
25 COMMITTER_MAIL_PATTERN = re.compile(r'^committer-mail (\S+)')
26 COMMITTER_TIME_PATTERN = re.compile(r'^committer-time (.+)')
27
28 FILENAME_PATTERN = re.compile(r'filename (\S+)')
29
30 COMMIT_HASH_PATTERN = re.compile(r'^commit (\S+)')
31
32 MESSAGE_START_PATTERN = re.compile(r'^--Message start--')
33 MESSAGE_END_PATTERN = re.compile(r'^--Message end--')
34
35 # This pattern is for M, A, D.
36 CHANGED_FILE_PATTERN1 = re.compile(r':(\d+) (\d+) (\S+) (\S+) (\w)\s+(\S+)')
37 # This pattern is for R, C.
38 CHANGED_FILE_PATTERN2 = re.compile(
39 r':(\d+) (\d+) (\S+) (\S+) ([A-Z0-9]*)\s+(\S+)\s(\S+)')
40
41 CHANGELOG_START_PATTERN = re.compile(r'^\*\*Changelog start\*\*')
42
43 INITIAL_TO_CHANGE_TYPE = {
44 'M': 'modify',
45 'A': 'add',
46 'D': 'delete',
47 'C': 'copy',
48 'R': 'rename'
49 }
50
51
52 class GitParser(object):
53
54 def __call__(self, output):
55 raise NotImplementedError()
56
57
58 class GitBlameParser(GitParser):
59 """Parses output of 'git blame --porcelain <rev> <file_path>'.
60
61 For example:
62 Git blame output of a Region is:
63 ed268bfed3205347a90557c5029f37e90cc01956 18 18 3
64 author test@google.com
65 author-mail <test@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81>
66 author-time 2016-06-02 10:54:14
67 committer test@google.com
68 committer-mail <test@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81>
69 committer-time 2016-06-02 10:54:14
70 summary add (mac) test for ttcindex in SkFontStream
71 previous fe7533eebe777cc66c7f8fa7a03f00572755c5b4 src/core/SkFontStream.h
72 filename src/core/SkFontStream.h
73 * Return the number of shared directories.
74 ed268bfed3205347a90557c5029f37e90cc01956 19 19
75 * if the stream is a normal sfnt (ttf). If there is an error or
76 ed268bfed3205347a90557c5029f37e90cc01956 20 20
77 * no directory is found, return 0.
78
79 Returns:
80 A list of parsed Blame objects.
81 """
82 # Arguments number differs from overridden '__call__' method -
83 # pylint:disable=W
stgao 2016/10/25 23:01:33 Why disable all Warnings?
Sharu Jiang 2016/10/26 06:13:38 Done.
84 def __call__(self, output, path, revision):
85 blame = Blame(revision, path)
86 commit_infos = defaultdict(dict)
87 curr_commit = None
88 for line in output.splitlines():
stgao 2016/10/25 23:01:33 Can we have an outer while loop to go through all
Sharu Jiang 2016/10/26 06:13:38 Done. Since the author-* information only shows a
89 # Sample: ec3ed6... 2 1 7.
90 if REGION_START_COUNT_PATTERN.match(line):
91 match = REGION_START_COUNT_PATTERN.match(line)
92 curr_commit = match.group(1)
93 blame.append(Region(int(match.group(2)),
94 int(match.group(3)),
95 match.group(1)))
96 elif curr_commit:
97 # Sample: author test@google.com.
98 if AUTHOR_NAME_PATTERN.match(line):
99 commit_infos[curr_commit]['author_name'] = (
100 AUTHOR_NAME_PATTERN.match(line).group(1))
101 # Sample: author-mail <test@google.com@2eff-a529-9590-31e7-b00076f81>.
102 elif AUTHOR_MAIL_PATTERN.match(line):
103 commit_infos[curr_commit]['author_email'] = repo_util.NormalizeEmail(
104 AUTHOR_MAIL_PATTERN.match(line).group(1).replace(
105 '<', '').replace('>', ''))
106 # Sample: author-time 2016-06-02 10:54:14.
107 elif AUTHOR_TIME_PATTERN.match(line):
108 commit_infos[curr_commit]['author_time'] = datetime.strptime(
109 AUTHOR_TIME_PATTERN.match(line).group(1), DATETIME_FORMAT)
110 elif FILENAME_PATTERN.match(line):
111 commit_infos[curr_commit]['path'] = FILENAME_PATTERN.match(
112 line).group(1)
113
114 for region in blame:
115 region.author_name = commit_infos[region.revision]['author_name']
116 region.author_email = commit_infos[region.revision]['author_email']
117 region.author_time = commit_infos[region.revision]['author_time']
118
119 return blame
120
121
122 def GetChangeType(initial):
123 """Gets Change type based on the initial character."""
124 return INITIAL_TO_CHANGE_TYPE.get(initial[0])
125
126
127 def GetFileChangeInfo(change_type, path1, path2):
128 """Set old/new path and old/new mode."""
129 if change_type.lower() == 'modify':
130 return {
131 'change_type': change_type,
132 'old_path': path1,
133 'new_path': path1
134 }
135
136 if change_type.lower() == 'add':
137 # Stay the same as gitile.
138 return {
139 'change_type': change_type,
140 'old_path': None,
141 'new_path': path1
142 }
143
144 if change_type.lower() == 'delete':
145 return {
146 'change_type': change_type,
147 'old_path': path1,
148 'new_path': None
149 }
150
151 if change_type.lower() == 'rename' or change_type.lower() == 'copy':
152 return {
153 'change_type': change_type,
154 'old_path': path1,
155 'new_path': path2
156 }
157
158 return None
159
160
161 class GitChangeLogParser(GitParser):
162
163 # Arguments number differs from overridden '__call__' method -
164 # pylint:disable=W
stgao 2016/10/25 23:01:33 same here.
Sharu Jiang 2016/10/26 06:13:38 Done.
165 def __call__(self, output, repo_url):
166 """Parses output of 'git log --pretty=format:<format>.
167
168 For example:
169 Git changelog output is:
170 commit 21a8979218c096f4a96b07b67c9531f5f09e28a3
171 tree 7d9a79c9b060c9a030abe20a8429d2b81ca1d4db
172 parents 9640406d426a2d153b16e1d9ae7f9105268b36c9
173
174 author Test
175 author-email test@google.com
176 author-time 2016-10-24 22:21:45
177
178 committer Test
179 committer-email test@google.com
180 committer-time 2016-10-24 22:25:45
181
182 --Message start--
183 Commit messages...
184 --Message end--
185
186 :100644 100644 25f95f c766f1 M src/a/delta/git_parsers.py
187
188 Returns:
189 Parsed ChangeLog object.
190 """
191 is_message_line = False
192 info = {'message':'', 'touched_files':[]}
193 for line in output.splitlines():
194 if MESSAGE_START_PATTERN.match(line):
195 is_message_line = True
196 continue
197
198 if MESSAGE_END_PATTERN.match(line):
199 is_message_line = False
200 # Remove the added '\n' at the end.
201 info['message'] = info['message'][:-1]
202 continue
203
204 if is_message_line:
205 info['message'] += line + '\n'
206 elif COMMIT_HASH_PATTERN.match(line):
207 info['revision'] = COMMIT_HASH_PATTERN.match(line).group(1)
208 elif AUTHOR_NAME_PATTERN.match(line):
209 info['author_name'] = AUTHOR_NAME_PATTERN.match(line).group(1)
210 elif AUTHOR_MAIL_PATTERN.match(line):
211 info['author_email'] = repo_util.NormalizeEmail(
212 AUTHOR_MAIL_PATTERN.match(line).group(1))
213 elif AUTHOR_TIME_PATTERN.match(line):
214 info['author_time'] = datetime.strptime(
215 AUTHOR_TIME_PATTERN.match(line).group(1), DATETIME_FORMAT)
216 elif COMMITTER_NAME_PATTERN.match(line):
217 info['committer_name'] = (
218 COMMITTER_NAME_PATTERN.match(line).group(1))
219 elif COMMITTER_MAIL_PATTERN.match(line):
220 info['committer_email'] = repo_util.NormalizeEmail(
221 COMMITTER_MAIL_PATTERN.match(line).group(1))
222 elif COMMITTER_TIME_PATTERN.match(line):
223 info['committer_time'] = datetime.strptime(
224 COMMITTER_TIME_PATTERN.match(line).group(1), DATETIME_FORMAT)
225 elif (CHANGED_FILE_PATTERN1.match(line) or
226 CHANGED_FILE_PATTERN2.match(line)):
227 match = (CHANGED_FILE_PATTERN1.match(line) or
228 CHANGED_FILE_PATTERN2.match(line))
229 # For modify, add, delete, the pattern is like:
230 # :100644 100644 df565d 6593e M modules/audio_coding/BUILD.gn
231 # For rename, copy, the pattern is like:
232 # :100644 100644 3f2e 20a5 R078 path1 path2
233 info['touched_files'].append(
234 GetFileChangeInfo(GetChangeType(match.group(5)),
235 match.group(6),
236 None if len(match.groups()) < 7
237 else match.group(7)))
238
239 # If commit is not parsed, the changelog will be {'author': {}, 'committer':
240 # {}, 'message': ''}, return None instead.
241 if not 'revision' in info:
242 return None
243
244 info['commit_position'], info['code_review_url'] = (
245 repo_util.ExtractCommitPositionAndCodeReviewUrl(info['message']))
246 info['reverted_revision'] = repo_util.GetRevertedRevision(info['message'])
247 info['commit_url'] = '%s/+/%s' % (repo_url, info['revision'])
248
249 return ChangeLog.FromDict(info)
250
251
252 class GitChangeLogsParser(GitParser):
253
254 # Arguments number differs from overridden '__call__' method -
255 # pylint:disable=W
256 def __call__(self, output, repo_url):
257 """Parses output of 'git log --pretty=format:<format> s_rev..e_rev'.
258
259 For example:
260 The output is:
261 **Changelog start**
262 commit 9af040a364c15bdc2adeea794e173a2c529a3ddc
263 tree 27b0421273ed4aea25e497c6d26d9c7db6481852
264 parents c39b0cc8a516de1fa57d032dc0135a4eadfe2c9e
265
266 author author1
267 author-mail author1@chromium.org
268 author-time 2016-10-24 22:21:45
269
270 committer Commit bot
271 committer-mail commit-bot@chromium.org
272 committer-time 2016-10-24 22:23:45
273
274 --Message start--
275 Message 1
276 --Message end--
277
278 :100644 100644 28e117 f12d3 M tools/win32.txt
279
280
281 **Changelog start**
282 commit c39b0cc8a516de1fa57d032dc0135a4eadfe2c9e
283 tree d22d3786e135b83183cfeba5f3d8913959f56299
284 parents ac7ee4ce7b8d39b22a710c58d110e0039c11cf9a
285
286 author author2
287 author-mail author2@chromium.org
288 author-time 2016-10-24 22:22:45
289
290 committer Commit bot
291 committer-mail commit-bot@chromium.org
292 committer-time 2016-10-24 22:23:45
293
294 --Message start--
295 Message2
296 --Message end--
297
298 :100644 100644 7280f df186 M tools/perf/benchmarks/memory_infra.py
299
300 Returns:
301 A list of parsed ChangeLog objects.
302 """
303 git_changelog_parser = GitChangeLogParser()
304
305 changelog_str = ''
306 changelogs = []
307 for line in output.splitlines():
308 if CHANGELOG_START_PATTERN.match(line):
309 if not changelog_str:
310 continue
311
312 change_log = git_changelog_parser(changelog_str, repo_url)
313 if change_log:
314 changelogs.append(change_log)
315 changelog_str = ''
316 else:
317 changelog_str += line + '\n'
318
319 change_log = git_changelog_parser(changelog_str, repo_url)
320 if change_log:
321 changelogs.append(change_log)
322
323 return changelogs
324
325
326 class GitDiffParser(GitParser):
327
328 def __call__(self, output):
329 """Returns the raw text output of 'git log --format="" --max-count=1'.
330
331 For example:
332 The output is like:
333
334 diff --git a/chrome/print_header.js b/chrome/print_header.js
335 index 51f25e7..4eec37f 100644
336 --- a/chrome/browser/resources/print_preview/print_header.js
337 +++ b/chrome/browser/resources/print_preview/print_header.js
338 @@ -188,20 +188,25 @@ cr.define('print_preview', function() {
339 var html;
340 var label;
341 if (numPages != numSheets) {
342 - html = loadTimeData.getStringF('printPreviewSummaryFormatLong',
343 - '<b>' + numSheets + '</b>',
344 - '<b>' + summaryLabel + '</b>',
345 - numPages,
346 - pagesLabel);
347 + html = loadTimeData.getStringF(
348 + 'printPreviewSummaryFormatLong',
349 + '<b>' + numSheets.toLocaleString() + '</b>',
350 + '<b>' + summaryLabel + '</b>',
351 + numPages.toLocaleString(),
352 + pagesLabel);
353 label = loadTimeData.getStringF('printPreviewSummaryFormatLong',
354 - numSheets, summaryLabel,
355 - numPages, pagesLabel);
356 + numSheets.toLocaleString(),
357 + summaryLabel,
358 + numPages.toLocaleString(),
359 + pagesLabel);
360 } else {
361 - html = loadTimeData.getStringF('printPreviewSummaryFormatShort',
362 - '<b>' + numSheets + '</b>',
363 - '<b>' + summaryLabel + '</b>');
364 + html = loadTimeData.getStringF(
365 + 'printPreviewSummaryFormatShort',
366 + '<b>' + numSheets.toLocaleString() + '</b>',
367 + '<b>' + summaryLabel + '</b>');
368 label = loadTimeData.getStringF('printPreviewSummaryFormatShort',
369 - numSheets, summaryLabel);
370 + numSheets.toLocaleString(),
371 + summaryLabel);
372 }
373 """
374 return output
375
376
377 class GitSourceParser(GitParser):
378
379 def __call__(self, output):
380 """Returns the raw text of a file source from 'git show <rev>:<file>'."""
381 return output
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698