Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(5)

Side by Side Diff: appengine/findit/util_scripts/git_checkout/local_git_parsers.py

Issue 2435863003: [Findit] Add local git parsers. (Closed)
Patch Set: Rebase. Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4
5 """Parse output of local git commands into Gitile response format."""
6
7 from collections import defaultdict
8 from datetime import datetime
9 import re
10
11 from lib import time_util
12 from lib.gitiles import commit_util
13 from lib.gitiles.blame import Blame
14 from lib.gitiles.blame import Region
15 from lib.gitiles.change_log import ChangeLog
16 from lib.gitiles.diff import ChangeType
17
18 REGION_START_COUNT_PATTERN = re.compile(r'^(\S+) \d+ (\d+) (\d+)')
19
20 DATETIME_FORMAT = '%Y-%m-%d %H:%M:%S'
21
22 AUTHOR_NAME_PATTERN = re.compile(r'^author (.*)')
23 AUTHOR_MAIL_PATTERN = re.compile(r'^author-mail (\S+)')
24 AUTHOR_TIME_PATTERN = re.compile(r'^author-time (.+)')
25 AUTHOR_TIMEZONE_PATTERN = re.compile(r'^author-tz (.*)')
26
27 COMMITTER_NAME_PATTERN = re.compile(r'^committer (.*)')
28 COMMITTER_MAIL_PATTERN = re.compile(r'^committer-mail (\S+)')
29 COMMITTER_TIME_PATTERN = re.compile(r'^committer-time (.+)')
30
31 COMMIT_HASH_PATTERN = re.compile(r'^commit (\S+)')
32
33 MESSAGE_START_PATTERN = re.compile(r'^--Message start--')
34 MESSAGE_END_PATTERN = re.compile(r'^--Message end--')
35
36 # This pattern is for M, A, D.
37 CHANGED_FILE_PATTERN1 = re.compile(r':(\d+) (\d+) (\S+) (\S+) (\w)\s+(\S+)')
38 # This pattern is for R, C.
39 CHANGED_FILE_PATTERN2 = re.compile(
40 r':(\d+) (\d+) (\S+) (\S+) ([A-Z0-9]*)\s+(\S+)\s(\S+)')
41
42 CHANGELOG_START_PATTERN = re.compile(r'^\*\*Changelog start\*\*')
43
44 INITIAL_TO_CHANGE_TYPE = {
45 'M': ChangeType.MODIFY,
46 'A': ChangeType.ADD,
47 'D': ChangeType.DELETE,
48 'C': ChangeType.COPY,
49 'R': ChangeType.RENAME
50 }
51
52
53 class GitParser(object):
54
55 def __call__(self, output):
56 raise NotImplementedError()
57
58
59 class GitBlameParser(GitParser):
60 """Parses output of 'git blame --porcelain <rev> <file_path>'.
61
62 For example:
63 Git blame output of a Region is:
64 ed268bfed3205347a90557c5029f37e90cc01956 18 18 3
65 author test@google.com
66 author-mail <test@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81>
67 author-time 1363032816
68 author-tz +0000
69 committer test@google.com
70 committer-mail <test@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81>
71 committer-time 1363032816
72 committer-tz +0000
73 summary add (mac) test for ttcindex in SkFontStream
74 previous fe7533eebe777cc66c7f8fa7a03f00572755c5b4 src/core/SkFontStream.h
75 filename src/core/SkFontStream.h
76 * Return the number of shared directories.
77 ed268bfed3205347a90557c5029f37e90cc01956 19 19
78 * if the stream is a normal sfnt (ttf). If there is an error or
79 ed268bfed3205347a90557c5029f37e90cc01956 20 20
80 * no directory is found, return 0.
81
82 Returns:
83 A list of parsed Blame objects.
84 """
85 def __call__(self, output, path, revision): # pylint:disable=W
86 if not output:
87 return None
88
89 blame = Blame(revision, path)
90 commit_info = defaultdict(dict)
91 region_info = None
92 for line in output.splitlines():
93 # Sample: ec3ed6... 2 1 7.
94 match = REGION_START_COUNT_PATTERN.match(line)
95 if match:
96 if region_info:
97 blame.AddRegion(
98 Region(region_info['start'],
99 region_info['count'],
100 region_info['revision'],
101 commit_info[region_info['revision']]['author_name'],
102 commit_info[region_info['revision']]['author_email'],
103 commit_info[region_info['revision']]['author_time']))
104
105 region_info = {'start': int(match.group(2)),
106 'count': int(match.group(3)),
107 'revision': match.group(1)}
108 elif region_info:
109 # Sample: author test@google.com.
110 if AUTHOR_NAME_PATTERN.match(line):
111 commit_info[region_info['revision']]['author_name'] = (
112 AUTHOR_NAME_PATTERN.match(line).group(1))
113 # Sample: author-mail <test@google.com@2eff-a529-9590-31e7-b00076f81>.
114 elif AUTHOR_MAIL_PATTERN.match(line):
115 commit_info[region_info['revision']]['author_email'] = (
116 commit_util.NormalizeEmail(
117 AUTHOR_MAIL_PATTERN.match(line).group(1).replace(
118 '<', '').replace('>', '')))
119 # Sample: author-time 1311863160.
120 elif AUTHOR_TIME_PATTERN.match(line):
121 commit_info[region_info['revision']]['author_time'] = (
122 AUTHOR_TIME_PATTERN.match(line).group(1))
123 # Sample: author-tz +0800.
124 elif AUTHOR_TIMEZONE_PATTERN.match(line):
125 time_zone = time_util.TimeZoneInfo(
126 AUTHOR_TIMEZONE_PATTERN.match(line).group(1))
127 commit_info[region_info['revision']]['author_time'] = (
128 time_zone.LocalToUTC(datetime.fromtimestamp(
129 int(commit_info[region_info['revision']]['author_time']))))
130
131 if region_info:
132 blame.AddRegion(
133 Region(region_info['start'],
134 region_info['count'],
135 region_info['revision'],
136 commit_info[region_info['revision']]['author_name'],
137 commit_info[region_info['revision']]['author_email'],
138 commit_info[region_info['revision']]['author_time']))
139
140 return blame if blame else None
141
142
143 def GetChangeType(initial):
144 """Gets Change type based on the initial character."""
145 return INITIAL_TO_CHANGE_TYPE.get(initial[0])
146
147
148 def GetFileChangeInfo(change_type, path1, path2):
149 """Set old/new path and old/new mode."""
150 if change_type.lower() == ChangeType.MODIFY:
151 return {
152 'change_type': change_type,
153 'old_path': path1,
154 'new_path': path1
155 }
156
157 if change_type.lower() == ChangeType.ADD:
158 # Stay the same as gitile.
159 return {
160 'change_type': change_type,
161 'old_path': None,
162 'new_path': path1
163 }
164
165 if change_type.lower() == ChangeType.DELETE:
166 return {
167 'change_type': change_type,
168 'old_path': path1,
169 'new_path': None
170 }
171
172 if (change_type.lower() == ChangeType.RENAME or
173 change_type.lower() == ChangeType.COPY):
174 return {
175 'change_type': change_type,
176 'old_path': path1,
177 'new_path': path2
178 }
179
180 return None
181
182
183 class GitChangeLogParser(GitParser):
184
185 def __call__(self, output, repo_url): # pylint:disable=W
186 """Parses output of 'git log --pretty=format:<format>.
187
188 For example:
189 Git changelog output is:
190 commit 21a8979218c096f4a96b07b67c9531f5f09e28a3
191 tree 7d9a79c9b060c9a030abe20a8429d2b81ca1d4db
192 parents 9640406d426a2d153b16e1d9ae7f9105268b36c9
193
194 author Test
195 author-email test@google.com
196 author-time 2016-10-24 22:21:45
197
198 committer Test
199 committer-email test@google.com
200 committer-time 2016-10-24 22:25:45
201
202 --Message start--
203 Commit messages...
204 --Message end--
205
206 :100644 100644 25f95f c766f1 M src/a/delta/git_parsers.py
207
208 Returns:
209 Parsed ChangeLog object.
210 """
211 if not output:
212 return None
213
214 is_message_line = False
215 info = {'message': '', 'touched_files': []}
216 for line in output.splitlines():
217 if MESSAGE_START_PATTERN.match(line):
218 is_message_line = True
219 continue
220
221 if MESSAGE_END_PATTERN.match(line):
222 is_message_line = False
223 # Remove the added '\n' at the end.
224 info['message'] = info['message'][:-1]
225 continue
226
227 if is_message_line:
228 info['message'] += line + '\n'
229 elif COMMIT_HASH_PATTERN.match(line):
230 info['revision'] = COMMIT_HASH_PATTERN.match(line).group(1)
231 elif AUTHOR_NAME_PATTERN.match(line):
232 info['author_name'] = AUTHOR_NAME_PATTERN.match(line).group(1)
233 elif AUTHOR_MAIL_PATTERN.match(line):
234 info['author_email'] = commit_util.NormalizeEmail(
235 AUTHOR_MAIL_PATTERN.match(line).group(1))
236 elif AUTHOR_TIME_PATTERN.match(line):
237 info['author_time'] = datetime.strptime(
238 AUTHOR_TIME_PATTERN.match(line).group(1), DATETIME_FORMAT)
239 elif COMMITTER_NAME_PATTERN.match(line):
240 info['committer_name'] = (
241 COMMITTER_NAME_PATTERN.match(line).group(1))
242 elif COMMITTER_MAIL_PATTERN.match(line):
243 info['committer_email'] = commit_util.NormalizeEmail(
244 COMMITTER_MAIL_PATTERN.match(line).group(1))
245 elif COMMITTER_TIME_PATTERN.match(line):
246 info['committer_time'] = datetime.strptime(
247 COMMITTER_TIME_PATTERN.match(line).group(1), DATETIME_FORMAT)
248 elif (CHANGED_FILE_PATTERN1.match(line) or
249 CHANGED_FILE_PATTERN2.match(line)):
250 match = (CHANGED_FILE_PATTERN1.match(line) or
251 CHANGED_FILE_PATTERN2.match(line))
252 # For modify, add, delete, the pattern is like:
253 # :100644 100644 df565d 6593e M modules/audio_coding/BUILD.gn
254 # For rename, copy, the pattern is like:
255 # :100644 100644 3f2e 20a5 R078 path1 path2
256 info['touched_files'].append(
257 GetFileChangeInfo(GetChangeType(match.group(5)),
258 match.group(6),
259 None if len(match.groups()) < 7
260 else match.group(7)))
261
262 # If commit is not parsed, the changelog will be {'author': {}, 'committer':
263 # {}, 'message': ''}, return None instead.
264 if not 'revision' in info:
265 return None
266
267 info['commit_position'], info['code_review_url'] = (
268 commit_util.ExtractCommitPositionAndCodeReviewUrl(info['message']))
269 info['reverted_revision'] = commit_util.GetRevertedRevision(
270 info['message'])
271 info['commit_url'] = '%s/+/%s' % (repo_url, info['revision'])
272
273 return ChangeLog.FromDict(info)
274
275
276 class GitChangeLogsParser(GitParser):
277
278 def __call__(self, output, repo_url): # pylint:disable=W
279 """Parses output of 'git log --pretty=format:<format> s_rev..e_rev'.
280
281 For example:
282 The output is:
283 **Changelog start**
284 commit 9af040a364c15bdc2adeea794e173a2c529a3ddc
285 tree 27b0421273ed4aea25e497c6d26d9c7db6481852
286 parents c39b0cc8a516de1fa57d032dc0135a4eadfe2c9e
287
288 author author1
289 author-mail author1@chromium.org
290 author-time 2016-10-24 22:21:45
291
292 committer Commit bot
293 committer-mail commit-bot@chromium.org
294 committer-time 2016-10-24 22:23:45
295
296 --Message start--
297 Message 1
298 --Message end--
299
300 :100644 100644 28e117 f12d3 M tools/win32.txt
301
302
303 **Changelog start**
304 commit c39b0cc8a516de1fa57d032dc0135a4eadfe2c9e
305 tree d22d3786e135b83183cfeba5f3d8913959f56299
306 parents ac7ee4ce7b8d39b22a710c58d110e0039c11cf9a
307
308 author author2
309 author-mail author2@chromium.org
310 author-time 2016-10-24 22:22:45
311
312 committer Commit bot
313 committer-mail commit-bot@chromium.org
314 committer-time 2016-10-24 22:23:45
315
316 --Message start--
317 Message2
318 --Message end--
319
320 :100644 100644 7280f df186 M tools/perf/benchmarks/memory_infra.py
321
322 Returns:
323 A list of parsed ChangeLog objects.
324 """
325 if not output:
326 return None
327
328 git_changelog_parser = GitChangeLogParser()
329
330 changelog_str = ''
331 changelogs = []
332 for line in output.splitlines():
333 if CHANGELOG_START_PATTERN.match(line):
334 if not changelog_str:
335 continue
336
337 change_log = git_changelog_parser(changelog_str, repo_url)
338 if change_log:
339 changelogs.append(change_log)
340 changelog_str = ''
341 else:
342 changelog_str += line + '\n'
343
344 change_log = git_changelog_parser(changelog_str, repo_url)
345 if change_log:
346 changelogs.append(change_log)
347
348 return changelogs
349
350
351 class GitDiffParser(GitParser):
352
353 def __call__(self, output):
354 """Returns the raw text output of 'git log --format="" --max-count=1'.
355
356 For example:
357 The output is like:
358
359 diff --git a/chrome/print_header.js b/chrome/print_header.js
360 index 51f25e7..4eec37f 100644
361 --- a/chrome/browser/resources/print_preview/print_header.js
362 +++ b/chrome/browser/resources/print_preview/print_header.js
363 @@ -188,20 +188,25 @@ cr.define('print_preview', function() {
364 var html;
365 var label;
366 if (numPages != numSheets) {
367 - html = loadTimeData.getStringF('printPreviewSummaryFormatLong',
368 - '<b>' + numSheets + '</b>',
369 - '<b>' + summaryLabel + '</b>',
370 - numPages,
371 - pagesLabel);
372 + html = loadTimeData.getStringF(
373 + 'printPreviewSummaryFormatLong',
374 + '<b>' + numSheets.toLocaleString() + '</b>',
375 + '<b>' + summaryLabel + '</b>',
376 + numPages.toLocaleString(),
377 + pagesLabel);
378 label = loadTimeData.getStringF('printPreviewSummaryFormatLong',
379 - numSheets, summaryLabel,
380 - numPages, pagesLabel);
381 + numSheets.toLocaleString(),
382 + summaryLabel,
383 + numPages.toLocaleString(),
384 + pagesLabel);
385 } else {
386 - html = loadTimeData.getStringF('printPreviewSummaryFormatShort',
387 - '<b>' + numSheets + '</b>',
388 - '<b>' + summaryLabel + '</b>');
389 + html = loadTimeData.getStringF(
390 + 'printPreviewSummaryFormatShort',
391 + '<b>' + numSheets.toLocaleString() + '</b>',
392 + '<b>' + summaryLabel + '</b>');
393 label = loadTimeData.getStringF('printPreviewSummaryFormatShort',
394 - numSheets, summaryLabel);
395 + numSheets.toLocaleString(),
396 + summaryLabel);
397 }
398 """
399 return output if output else None
OLDNEW
« no previous file with comments | « appengine/findit/util_scripts/git_checkout/__init__.py ('k') | appengine/findit/util_scripts/git_checkout/test/__init__.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698