Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(122)

Side by Side Diff: appengine/findit/common/local_git_parsers.py

Issue 2435863003: [Findit] Add local git parsers. (Closed)
Patch Set: Fix nits. Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4
5 """Parse output of local git commands into Gitile response format."""
6
7 from collections import defaultdict
8 from datetime import datetime
9 from datetime import time
10 from datetime import timedelta
11 import re
12
13 from common import repo_util
14 from common import time_util
15 from common.blame import Blame
16 from common.blame import Region
17 from common.change_log import ChangeLog
18 from common.change_log import FileChangeInfo
19
20 REGION_START_COUNT_PATTERN = re.compile(r'^(\S+) \d+ (\d+) (\d+)')
21
22 AUTHOR_NAME_PATTERN = re.compile(r'^author (.*)')
23 AUTHOR_MAIL_PATTERN = re.compile(r'^author-mail (\S+)')
24 AUTHOR_TIME_PATTERN = re.compile(r'^author-time (\d+)')
25 AUTHOR_TIMEZONE_PATTERN = re.compile(r'^author-tz (.*)')
26
27 COMMITTER_NAME_PATTERN = re.compile(r'^committer (.*)')
28 COMMITTER_MAIL_PATTERN = re.compile(r'^committer-mail (\S+)')
29 COMMITTER_TIME_PATTERN = re.compile(r'^committer-time (\d+)')
30
31 FILENAME_PATTERN = re.compile(r'filename (\S+)')
32
33 COMMIT_HASH_PATTERN = re.compile(r'^commit (\S+)')
34
35 MESSAGE_START_PATTERN = re.compile(r'^--Message start--')
36 MESSAGE_END_PATTERN = re.compile(r'^--Message end--')
37
38 # This pattern is for M, A, D.
39 CHANGED_FILE_PATTERN1 = re.compile(r':(\d+) (\d+) (\S+) (\S+) (\w)\s+(\S+)')
40 # This pattern is for R, C.
41 CHANGED_FILE_PATTERN2 = re.compile(
42 r':(\d+) (\d+) (\S+) (\S+) ([A-Z0-9]*)\s+(\S+)\s(\S+)')
43
44 CHANGELOG_START_PATTERN = re.compile(r'^\*\*Changelog start\*\*')
45
46 INITIAL_TO_CHANGE_TYPE = {
47 'M': 'modify',
48 'A': 'add',
49 'D': 'delete',
50 'C': 'copy',
51 'R': 'rename'
52 }
53
54
55 class GitParser(object):
56
57 def __call__(self, output):
58 raise NotImplementedError()
59
60
61 class GitBlameParser(GitParser):
62 """Parses output of 'git blame --porcelain <rev> <file_path>'.
63
64 For example:
65 Git blame output of a Region is:
66 ed268bfed3205347a90557c5029f37e90cc01956 18 18 3
67 author test@google.com
68 author-mail <test@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81>
69 author-time 1363032816
70 author-tz +0000
71 committer test@google.com
72 committer-mail <test@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81>
73 committer-time 1363032816
74 committer-tz +0000
75 summary add (mac) test for ttcindex in SkFontStream
76 previous fe7533eebe777cc66c7f8fa7a03f00572755c5b4 src/core/SkFontStream.h
77 filename src/core/SkFontStream.h
78 * Return the number of shared directories.
79 ed268bfed3205347a90557c5029f37e90cc01956 19 19
80 * if the stream is a normal sfnt (ttf). If there is an error or
81 ed268bfed3205347a90557c5029f37e90cc01956 20 20
82 * no directory is found, return 0.
83
84 Returns:
85 A list of parsed Blame objects.
86 """
87 def __call__(self, output):
88 commit_to_regions = defaultdict(list)
89 commit_infos = defaultdict(dict)
90 curr_commit = None
91 for line in output.splitlines():
92 # Sample: ec3ed6... 2 1 7.
93 if REGION_START_COUNT_PATTERN.match(line):
94 match = REGION_START_COUNT_PATTERN.match(line)
95 curr_commit = match.group(1)
96 commit_to_regions[curr_commit].append(Region(int(match.group(2)),
97 int(match.group(3)),
98 match.group(1)))
99 elif curr_commit:
100 # Sample: author test@google.com.
101 if AUTHOR_NAME_PATTERN.match(line):
102 commit_infos[curr_commit]['author_name'] = (
103 AUTHOR_NAME_PATTERN.match(line).group(1))
104 # Sample: author-mail <test@google.com@2eff-a529-9590-31e7-b00076f81>.
105 elif AUTHOR_MAIL_PATTERN.match(line):
106 commit_infos[curr_commit]['author_email'] = repo_util.NormalizeEmail(
107 AUTHOR_MAIL_PATTERN.match(line).group(1).replace(
108 '<', '').replace('>', ''))
109 # Sample: author-time 1311863160.
110 elif AUTHOR_TIME_PATTERN.match(line):
111 commit_infos[curr_commit]['author_time'] = (
112 AUTHOR_TIME_PATTERN.match(line).group(1))
113 # Sample: author-tz +0800.
114 elif AUTHOR_TIMEZONE_PATTERN.match(line):
115 time_zone = time_util.TimeZoneInfo(
116 AUTHOR_TIMEZONE_PATTERN.match(line).group(1))
117 commit_infos[curr_commit]['author_time'] = time_zone.LocalToUTC(
118 datetime.fromtimestamp(
119 int(commit_infos[curr_commit]['author_time'])))
120
121 elif FILENAME_PATTERN.match(line):
122 commit_infos[curr_commit]['path'] = FILENAME_PATTERN.match(
123 line).group(1)
124
125 blames = []
126 for commit, regions in commit_to_regions.iteritems():
127 for region in regions:
128 region.author_name = commit_infos[commit]['author_name']
129 region.author_email = commit_infos[commit]['author_email']
130 region.author_time = commit_infos[commit]['author_time']
131 blames.append(Blame(commit, commit_infos[commit]['path'], regions))
132
133 return blames
134
135
136 def GetChangeType(initial):
137 """Gets Change type based on the initial character."""
138 return INITIAL_TO_CHANGE_TYPE.get(initial[0])
139
140
141 def GetFileChangeInfo(change_type, path1, path2):
142 """Set old/new path and old/new mode."""
143 if change_type.lower() == 'modify':
144 return FileChangeInfo(change_type, path1, path1)
145
146 if change_type.lower() == 'add':
147 # Stay the same as gitile.
148 return FileChangeInfo(change_type, None, path1)
149
150 if change_type.lower() == 'delete':
151 return FileChangeInfo(change_type, path1, None)
152
153 if change_type.lower() == 'rename' or change_type.lower() == 'copy':
154 return FileChangeInfo(change_type, path1, path2)
155
156 return None
157
158
159 class GitChangeLogParser(GitParser):
160
161 def __call__(self, output):
162 """Parses output of 'git log --pretty=format:<format>.
163
164 For example:
165 Git changelog output is:
166 commit 21a8979218c096f4a96b07b67c9531f5f09e28a3
167 tree 7d9a79c9b060c9a030abe20a8429d2b81ca1d4db
168 parents 9640406d426a2d153b16e1d9ae7f9105268b36c9
169
170 author Test
171 author-email test@google.com
172 author-time 1468442226
173
174 committer Test
175 committer-email test@google.com
176 committer-time 1468442226
177
178 --Message start--
179 Commit messages...
180 --Message end--
181
182 :100644 100644 25f95f c766f1 M src/a/delta/git_parsers.py
183
184 Returns:
185 Parsed ChangeLog object.
186 """
187 is_message_line = False
188 change_log = ChangeLog(message='', touched_files=[])
189 for line in output.splitlines():
190 if MESSAGE_START_PATTERN.match(line):
191 is_message_line = True
192 continue
193
194 if MESSAGE_END_PATTERN.match(line):
195 is_message_line = False
196 # Delete the last '\n' added when producing message end pattern.
197 change_log.message = change_log.message[:-1]
198 continue
199
200 if is_message_line:
201 change_log.message += line + '\n'
202 elif COMMIT_HASH_PATTERN.match(line):
203 change_log.revision = COMMIT_HASH_PATTERN.match(line).group(1)
204 elif AUTHOR_NAME_PATTERN.match(line):
205 change_log.author_name = AUTHOR_NAME_PATTERN.match(line).group(1)
206 elif AUTHOR_MAIL_PATTERN.match(line):
207 change_log.author_email = repo_util.NormalizeEmail(
208 AUTHOR_MAIL_PATTERN.match(line).group(1))
209 elif AUTHOR_TIME_PATTERN.match(line):
210 change_log.author_time = datetime.utcfromtimestamp(
211 int(AUTHOR_TIME_PATTERN.match(line).group(1)))
212 elif COMMITTER_NAME_PATTERN.match(line):
213 change_log.committer_name = (
214 COMMITTER_NAME_PATTERN.match(line).group(1))
215 elif COMMITTER_MAIL_PATTERN.match(line):
216 change_log.committer_email = repo_util.NormalizeEmail(
217 COMMITTER_MAIL_PATTERN.match(line).group(1))
218 elif COMMITTER_TIME_PATTERN.match(line):
219 change_log.committer_time = datetime.utcfromtimestamp(
220 int(COMMITTER_TIME_PATTERN.match(line).group(1)))
221 elif (CHANGED_FILE_PATTERN1.match(line) or
222 CHANGED_FILE_PATTERN2.match(line)):
223 match = (CHANGED_FILE_PATTERN1.match(line) or
224 CHANGED_FILE_PATTERN2.match(line))
225 # For modify, add, delete, the pattern is like:
226 # :100644 100644 df565d 6593e M modules/audio_coding/BUILD.gn
227 # For rename, copy, the pattern is like:
228 # :100644 100644 3f2e 20a5 R078 path1 path2
229 change_log.touched_files.append(
230 GetFileChangeInfo(GetChangeType(match.group(5)),
231 match.group(6),
232 None if len(match.groups()) < 7
233 else match.group(7)))
234
235 # If commit is not parsed, the changelog will be {'author': {}, 'committer':
236 # {}, 'message': ''}, return None instead.
237 if not change_log.revision:
238 return None
239
240 change_log.commit_position, change_log.code_review_url = (
241 repo_util.ExtractCommitPositionAndCodeReviewUrl(change_log.message))
242 change_log.reverted_revision = repo_util.GetRevertedRevision(
243 change_log.message)
244
245 return change_log
246
247
248 class GitChangeLogsParser(GitParser):
249
250 def __call__(self, output):
251 """Parses output of 'git log --pretty=format:<format> s_rev..e_rev'.
252
253 For example:
254 The output is:
255 **Changelog start**
256 commit 9af040a364c15bdc2adeea794e173a2c529a3ddc
257 tree 27b0421273ed4aea25e497c6d26d9c7db6481852
258 parents c39b0cc8a516de1fa57d032dc0135a4eadfe2c9e
259
260 author author1
261 author-mail author1@chromium.org
262 author-time 1464864938
263
264 committer Commit bot
265 committer-mail commit-bot@chromium.org
266 committer-time 1464865033
267
268 --Message start--
269 Message 1
270 --Message end--
271
272 :100644 100644 28e117 f12d3 M tools/win32.txt
273
274
275 **Changelog start**
276 commit c39b0cc8a516de1fa57d032dc0135a4eadfe2c9e
277 tree d22d3786e135b83183cfeba5f3d8913959f56299
278 parents ac7ee4ce7b8d39b22a710c58d110e0039c11cf9a
279
280 author author2
281 author-mail author2@chromium.org
282 author-time 1464864783
283
284 committer Commit bot
285 committer-mail commit-bot@chromium.org
286 committer-time 1464864854
287
288 --Message start--
289 Message2
290 --Message end--
291
292 :100644 100644 7280f df186 M tools/perf/benchmarks/memory_infra.py
293
294 Returns:
295 A list of parsed ChangeLog objects.
296 """
297 git_changelog_parser = GitChangeLogParser()
298
299 changelog_str = ''
300 changelogs = []
301 for line in output.splitlines():
302 if CHANGELOG_START_PATTERN.match(line):
303 if not changelog_str:
304 continue
305
306 change_log = git_changelog_parser(changelog_str)
307 if change_log:
308 changelogs.append(change_log)
309 changelog_str = ''
310 else:
311 changelog_str += line + '\n'
312
313 change_log = git_changelog_parser(changelog_str)
314 if change_log:
315 changelogs.append(change_log)
316
317 return changelogs
318
319
320 class GitDiffParser(GitParser):
321
322 def __call__(self, output):
323 """Returns the raw text output of 'git log --format="" --max-count=1'.
324
325 For example:
326 The output is like:
327
328 diff --git a/chrome/print_header.js b/chrome/print_header.js
329 index 51f25e7..4eec37f 100644
330 --- a/chrome/browser/resources/print_preview/print_header.js
331 +++ b/chrome/browser/resources/print_preview/print_header.js
332 @@ -188,20 +188,25 @@ cr.define('print_preview', function() {
333 var html;
334 var label;
335 if (numPages != numSheets) {
336 - html = loadTimeData.getStringF('printPreviewSummaryFormatLong',
337 - '<b>' + numSheets + '</b>',
338 - '<b>' + summaryLabel + '</b>',
339 - numPages,
340 - pagesLabel);
341 + html = loadTimeData.getStringF(
342 + 'printPreviewSummaryFormatLong',
343 + '<b>' + numSheets.toLocaleString() + '</b>',
344 + '<b>' + summaryLabel + '</b>',
345 + numPages.toLocaleString(),
346 + pagesLabel);
347 label = loadTimeData.getStringF('printPreviewSummaryFormatLong',
348 - numSheets, summaryLabel,
349 - numPages, pagesLabel);
350 + numSheets.toLocaleString(),
351 + summaryLabel,
352 + numPages.toLocaleString(),
353 + pagesLabel);
354 } else {
355 - html = loadTimeData.getStringF('printPreviewSummaryFormatShort',
356 - '<b>' + numSheets + '</b>',
357 - '<b>' + summaryLabel + '</b>');
358 + html = loadTimeData.getStringF(
359 + 'printPreviewSummaryFormatShort',
360 + '<b>' + numSheets.toLocaleString() + '</b>',
361 + '<b>' + summaryLabel + '</b>');
362 label = loadTimeData.getStringF('printPreviewSummaryFormatShort',
363 - numSheets, summaryLabel);
364 + numSheets.toLocaleString(),
365 + summaryLabel);
366 }
367 """
368 return output
369
370
371 class GitSourceParser(GitParser):
372
373 def __call__(self, output):
374 """Returns the raw text of a file source from 'git show <rev>:<file>'."""
375 return output
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698