Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(369)

Side by Side Diff: appengine/findit/common/local_git_parsers.py

Issue 2435863003: [Findit] Add local git parsers. (Closed)
Patch Set: Fix nits. Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4
5 """Parse output of local git commands into Gitile response format."""
6
7 from collections import defaultdict
8 from datetime import datetime
9 from datetime import timedelta
10 from datetime import time
11 from datetime import tzinfo
12 import re
13
14 from common import repo_util
15 from common.blame import Blame
16 from common.blame import Region
17 from common.change_log import ChangeLog
18 from common.change_log import FileChangeInfo
19
20 REGION_START_COUNT_PATTERN = re.compile(r'^(\S+) \d+ (\d+) (\d+)')
21
22 AUTHOR_NAME_PATTERN = re.compile(r'^author (.*)')
23 AUTHOR_MAIL_PATTERN = re.compile(r'^author-mail (\S+)')
24 AUTHOR_TIME_PATTERN = re.compile(r'^author-time (\d+)')
25 AUTHOR_TIMEZONE_PATTERN = re.compile(r'^author-tz (.*)')
26
27 COMMITTER_NAME_PATTERN = re.compile(r'^committer (.*)')
28 COMMITTER_MAIL_PATTERN = re.compile(r'^committer-mail (\S+)')
29 COMMITTER_TIME_PATTERN = re.compile(r'^committer-time (\d+)')
30
31 FILENAME_PATTERN = re.compile(r'filename (\S+)')
32
33 COMMIT_HASH_PATTERN = re.compile(r'^commit (\S+)')
34
35 MESSAGE_START_PATTERN = re.compile(r'^--Message start--')
36 MESSAGE_END_PATTERN = re.compile(r'^--Message end--')
37
38 # This pattern is for M, A, D.
39 CHANGED_FILE_PATTERN1 = re.compile(r':(\d+) (\d+) (\S+) (\S+) (\w)\s+(\S+)')
40 # This pattern is for R, C.
41 CHANGED_FILE_PATTERN2 = re.compile(
42 r':(\d+) (\d+) (\S+) (\S+) ([A-Z0-9]*)\s+(\S+)\s(\S+)')
43
44 CHANGELOG_START_PATTERN = re.compile(r'^\*\*Changelog start\*\*')
45
46 INITIAL_TO_CHANGE_TYPE = {
47 'M': 'modify',
48 'A': 'add',
49 'D': 'delete',
50 'C': 'copy',
51 'R': 'rename'
52 }
53
54
55 class TimeZoneInfo(tzinfo):
56 """Gets time zone info from string like: +0800."""
57
58 def __init__(self, offset_str):
59 super(TimeZoneInfo, self).__init__()
60 offset = int(offset_str[-4:-2]) * 60 + int(offset_str[-2:])
lijeffrey 2016/10/20 23:31:31 is it possible to move offset manipulation to a se
Sharu Jiang 2016/10/21 01:07:42 Done.
61 if offset_str[0] == '-':
62 offset = -offset
63 self._offset = timedelta(minutes=offset)
64 self._tzname = offset_str
65
66 def utcoffset(self, dt=None): # pylint: disable=W0613
67 return self._offset
68
69 def tzname(self, dt=None): # pylint: disable=W0613
70 return self._tzname
71
72 def dst(self, dt=None): # pylint: disable=W0613
73 return timedelta(0)
74
75 def LocalToUTC(self, naive_time):
76 """Localizes naive datetime and converts it to utc naive datetime."""
77 return naive_time - self.utcoffset()
78
79
80 class GitParser(object):
81
82 def __call__(self, output):
83 raise NotImplementedError()
84
85
86 class GitBlameParser(GitParser):
87 """Parses output of 'git blame --porcelain <rev> <file_path>'.
88
89 For example:
90 Git blame output of a Region is:
91 ed268bfed3205347a90557c5029f37e90cc01956 18 18 3
92 author test@google.com
93 author-mail <test@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81>
94 author-time 1363032816
95 author-tz +0000
96 committer test@google.com
97 committer-mail <test@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81>
98 committer-time 1363032816
99 committer-tz +0000
100 summary add (mac) test for ttcindex in SkFontStream
101 previous fe7533eebe777cc66c7f8fa7a03f00572755c5b4 src/core/SkFontStream.h
102 filename src/core/SkFontStream.h
103 * Return the number of shared directories.
104 ed268bfed3205347a90557c5029f37e90cc01956 19 19
105 * if the stream is a normal sfnt (ttf). If there is an error or
106 ed268bfed3205347a90557c5029f37e90cc01956 20 20
107 * no directory is found, return 0.
108
109 Returns:
110 A list of parsed Blame objects.
111 """
112 def __call__(self, output):
113 commit_to_regions = defaultdict(list)
114 commit_infos = defaultdict(dict)
115 curr_commit = None
116 for line in output.splitlines():
117 # Sample: ec3ed6a5ebf6f2c406d7bcf94b6bc34fcaeb976e 2 1 7
lijeffrey 2016/10/20 23:31:31 pylint will complain about these. how about "ec3ed
Sharu Jiang 2016/10/21 01:07:42 Done.
118 if REGION_START_COUNT_PATTERN.match(line):
119 match = REGION_START_COUNT_PATTERN.match(line)
120 curr_commit = match.group(1)
121 commit_to_regions[curr_commit].append(Region(int(match.group(2)),
122 int(match.group(3)),
123 match.group(1)))
124 elif curr_commit:
125 # Sample: author test@google.com
126 if AUTHOR_NAME_PATTERN.match(line):
127 commit_infos[curr_commit]['author_name'] = (
128 AUTHOR_NAME_PATTERN.match(line).group(1))
129 # Sample: author-mail <test@google.com@2eff-a529-9590-31e7-b00076f81>
130 elif AUTHOR_MAIL_PATTERN.match(line):
131 commit_infos[curr_commit]['author_email'] = repo_util.NormalizeEmail(
132 AUTHOR_MAIL_PATTERN.match(line).group(1).replace(
133 '<', '').replace('>', ''))
134 # Sample: author-time 1311863160
135 elif AUTHOR_TIME_PATTERN.match(line):
136 commit_infos[curr_commit]['author_time'] = (
137 AUTHOR_TIME_PATTERN.match(line).group(1))
138 # Sample: author-tz +0800
139 elif AUTHOR_TIMEZONE_PATTERN.match(line):
140 time_zone = TimeZoneInfo(AUTHOR_TIMEZONE_PATTERN.match(line).group(1))
141 commit_infos[curr_commit]['author_time'] = time_zone.LocalToUTC(
142 datetime.fromtimestamp(
143 int(commit_infos[curr_commit]['author_time'])))
144
145 elif FILENAME_PATTERN.match(line):
146 commit_infos[curr_commit]['path'] = FILENAME_PATTERN.match(
147 line).group(1)
148
149 blames = []
150 for commit, regions in commit_to_regions.iteritems():
151 for region in regions:
152 region.author_name = commit_infos[commit]['author_name']
153 region.author_email = commit_infos[commit]['author_email']
154 region.author_time = commit_infos[commit]['author_time']
155 blames.append(Blame(commit, commit_infos[commit]['path'], regions))
156
157 return blames
158
159
160 def GetChangeType(initial):
161 """Gets Change type based on the initial carocter."""
lijeffrey 2016/10/20 23:31:31 character?
Sharu Jiang 2016/10/21 01:07:42 Oops.
162 initial = initial[0]
163 return INITIAL_TO_CHANGE_TYPE.get(initial)
lijeffrey 2016/10/20 23:31:32 why not inline initial[0]?
Sharu Jiang 2016/10/21 01:07:42 Done.
164
165
166 def GetFileChangeInfo(change_type, path1, path2):
167 """Set old/new path and old/new mode."""
168 if change_type == 'modify':
lijeffrey 2016/10/20 23:31:31 use .lower() before comparing strings
Sharu Jiang 2016/10/21 01:07:42 Done.
169 return FileChangeInfo(change_type, path1, path1)
170
171 if change_type == 'add':
172 # Stay the same as gitile.
173 return FileChangeInfo(change_type, None, path1)
174
175 if change_type == 'delete':
176 return FileChangeInfo(change_type, path1, None)
177
178 if change_type == 'rename' or change_type == 'copy':
179 return FileChangeInfo(change_type, path1, path2)
180
181 return None
182
183
184 class GitChangeLogParser(GitParser):
185
186 def __call__(self, output):
187 """Parses output of 'git log --pretty=format:<format>.
188
189 For example:
190 Git changelog output is:
191 commit 21a8979218c096f4a96b07b67c9531f5f09e28a3
192 tree 7d9a79c9b060c9a030abe20a8429d2b81ca1d4db
193 parents 9640406d426a2d153b16e1d9ae7f9105268b36c9
194
195 author Test
196 author-email test@google.com
197 author-time 1468442226
198
199 committer Test
200 committer-email test@google.com
201 committer-time 1468442226
202
203 --Message start--
204 Commit messages...
205 --Message end--
206
207 :100644 100644 25f95f c766f1 M src/a/delta/git_parsers.py
208
209 Returns:
210 Parsed ChangeLog object.
211 """
212 is_message_line = False
213 change_log = ChangeLog(message='', touched_files=[])
214 for line in output.splitlines():
215 if MESSAGE_START_PATTERN.match(line):
216 is_message_line = True
217 continue
218
219 if MESSAGE_END_PATTERN.match(line):
220 is_message_line = False
221 # Delete the last '\n' added when producing message end pattern.
222 change_log.message = change_log.message[:-1]
lijeffrey 2016/10/20 23:31:32 nit: is it safer to first check the last character
Sharu Jiang 2016/10/21 01:07:42 Since this '\n' is manually added at last, so no n
223 continue
224
225 if is_message_line:
226 change_log.message += line + '\n'
227 elif COMMIT_HASH_PATTERN.match(line):
228 change_log.revision = COMMIT_HASH_PATTERN.match(line).group(1)
229 elif AUTHOR_NAME_PATTERN.match(line):
230 change_log.author_name = AUTHOR_NAME_PATTERN.match(line).group(1)
231 elif AUTHOR_MAIL_PATTERN.match(line):
232 change_log.author_email = repo_util.NormalizeEmail(
233 AUTHOR_MAIL_PATTERN.match(line).group(1))
234 elif AUTHOR_TIME_PATTERN.match(line):
235 change_log.author_time = datetime.utcfromtimestamp(
236 int(AUTHOR_TIME_PATTERN.match(line).group(1)))
237 elif COMMITTER_NAME_PATTERN.match(line):
238 change_log.committer_name = (
239 COMMITTER_NAME_PATTERN.match(line).group(1))
240 elif COMMITTER_MAIL_PATTERN.match(line):
241 change_log.committer_email = repo_util.NormalizeEmail(
242 COMMITTER_MAIL_PATTERN.match(line).group(1))
243 elif COMMITTER_TIME_PATTERN.match(line):
244 change_log.committer_time = datetime.utcfromtimestamp(
245 int(COMMITTER_TIME_PATTERN.match(line).group(1)))
246 elif (CHANGED_FILE_PATTERN1.match(line) or
247 CHANGED_FILE_PATTERN2.match(line)):
248 match = (CHANGED_FILE_PATTERN1.match(line) or
249 CHANGED_FILE_PATTERN2.match(line))
250 # For modify, add, delete, the pattern is like:
251 # :100644 100644 df565d 6593e M modules/audio_coding/BUILD.gn
252 # For rename, copy, the pattern is like:
253 # :100644 100644 3f2e 20a5 R078 path1 path2
254 change_log.touched_files.append(
255 GetFileChangeInfo(GetChangeType(match.group(5)),
256 match.group(6),
257 None if len(match.groups()) < 7
258 else match.group(7)))
259
260 # If commit is not parsed, the changelog will be {'author': {}, 'committer':
261 # {}, 'message': ''}, return None instead.
262 if not change_log.revision:
263 return None
264
265 change_log.commit_position, change_log.code_review_url = (
266 repo_util.ExtractCommitPositionAndCodeReviewUrl(change_log.message))
267 change_log.reverted_revision = repo_util.GetRevertedRevision(
268 change_log.message)
269
270 return change_log
271
272
273 class GitChangeLogsParser(GitParser):
274
275 def __call__(self, output):
276 """Parses output of 'git log --pretty=format:<format> s_rev..e_rev'.
277
278 For example:
279 The output is:
280 **Changelog start**
281 commit 9af040a364c15bdc2adeea794e173a2c529a3ddc
282 tree 27b0421273ed4aea25e497c6d26d9c7db6481852
283 parents c39b0cc8a516de1fa57d032dc0135a4eadfe2c9e
284
285 author author1
286 author-mail author1@chromium.org
287 author-time 1464864938
288
289 committer Commit bot
290 committer-mail commit-bot@chromium.org
291 committer-time 1464865033
292
293 --Message start--
294 Message 1
295 --Message end--
296
297 :100644 100644 28e117 f12d3 M tools/win32.txt
298
299
300 **Changelog start**
301 commit c39b0cc8a516de1fa57d032dc0135a4eadfe2c9e
302 tree d22d3786e135b83183cfeba5f3d8913959f56299
303 parents ac7ee4ce7b8d39b22a710c58d110e0039c11cf9a
304
305 author author2
306 author-mail author2@chromium.org
307 author-time 1464864783
308
309 committer Commit bot
310 committer-mail commit-bot@chromium.org
311 committer-time 1464864854
312
313 --Message start--
314 Message2
315 --Message end--
316
317 :100644 100644 7280f df186 M tools/perf/benchmarks/memory_infra.py
318
319 Returns:
320 A list of parsed ChangeLog objects.
321 """
322 git_changelog_parser = GitChangeLogParser()
323
324 changelog_str = ''
325 changelogs = []
326 for line in output.splitlines():
327 if CHANGELOG_START_PATTERN.match(line):
328 if not changelog_str:
329 continue
330
331 change_log = git_changelog_parser(changelog_str)
332 if change_log:
333 changelogs.append(change_log)
334 changelog_str = ''
335 else:
336 changelog_str += line + '\n'
337
338 change_log = git_changelog_parser(changelog_str)
339 if change_log:
340 changelogs.append(change_log)
341
342 return changelogs
343
344
345 class GitDiffParser(GitParser):
346
347 def __call__(self, output):
348 """Returns the raw text output of 'git log --format="" --max-count=1'.
349
350 For example:
351 The output is like:
352
353 diff --git a/chrome/print_header.js b/chrome/print_header.js
354 index 51f25e7..4eec37f 100644
355 --- a/chrome/browser/resources/print_preview/print_header.js
356 +++ b/chrome/browser/resources/print_preview/print_header.js
357 @@ -188,20 +188,25 @@ cr.define('print_preview', function() {
358 var html;
359 var label;
360 if (numPages != numSheets) {
361 - html = loadTimeData.getStringF('printPreviewSummaryFormatLong',
362 - '<b>' + numSheets + '</b>',
363 - '<b>' + summaryLabel + '</b>',
364 - numPages,
365 - pagesLabel);
366 + html = loadTimeData.getStringF(
367 + 'printPreviewSummaryFormatLong',
368 + '<b>' + numSheets.toLocaleString() + '</b>',
369 + '<b>' + summaryLabel + '</b>',
370 + numPages.toLocaleString(),
371 + pagesLabel);
372 label = loadTimeData.getStringF('printPreviewSummaryFormatLong',
373 - numSheets, summaryLabel,
374 - numPages, pagesLabel);
375 + numSheets.toLocaleString(),
376 + summaryLabel,
377 + numPages.toLocaleString(),
378 + pagesLabel);
379 } else {
380 - html = loadTimeData.getStringF('printPreviewSummaryFormatShort',
381 - '<b>' + numSheets + '</b>',
382 - '<b>' + summaryLabel + '</b>');
383 + html = loadTimeData.getStringF(
384 + 'printPreviewSummaryFormatShort',
385 + '<b>' + numSheets.toLocaleString() + '</b>',
386 + '<b>' + summaryLabel + '</b>');
387 label = loadTimeData.getStringF('printPreviewSummaryFormatShort',
388 - numSheets, summaryLabel);
389 + numSheets.toLocaleString(),
390 + summaryLabel);
391 }
392 """
393 return output
394
395
396 class GitSourceParser(GitParser):
397
398 def __call__(self, output):
399 """Returns the raw text of a file source from 'git show <rev>:<file>'."""
400 return output
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698