Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(227)

Side by Side Diff: appengine/findit/util_scripts/git_checkout/local_git_parsers.py

Issue 2435863003: [Findit] Add local git parsers. (Closed)
Patch Set: Fix nits. Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4
5 """Parse output of local git commands into Gitile response format."""
6
7 from collections import defaultdict
8 from datetime import datetime
9 import re
10
11 from lib import time_util
12 from lib.gitiles import commit_util
13 from lib.gitiles.blame import Blame
14 from lib.gitiles.blame import Region
15 from lib.gitiles.change_log import ChangeLog
16
17 REGION_START_COUNT_PATTERN = re.compile(r'^(\S+) \d+ (\d+) (\d+)')
18
19 DATETIME_FORMAT = '%Y-%m-%d %H:%M:%S'
20
21 AUTHOR_NAME_PATTERN = re.compile(r'^author (.*)')
22 AUTHOR_MAIL_PATTERN = re.compile(r'^author-mail (\S+)')
23 AUTHOR_TIME_PATTERN = re.compile(r'^author-time (.+)')
24 AUTHOR_TIMEZONE_PATTERN = re.compile(r'^author-tz (.*)')
25
26 COMMITTER_NAME_PATTERN = re.compile(r'^committer (.*)')
27 COMMITTER_MAIL_PATTERN = re.compile(r'^committer-mail (\S+)')
28 COMMITTER_TIME_PATTERN = re.compile(r'^committer-time (.+)')
29
30 COMMIT_HASH_PATTERN = re.compile(r'^commit (\S+)')
31
32 MESSAGE_START_PATTERN = re.compile(r'^--Message start--')
33 MESSAGE_END_PATTERN = re.compile(r'^--Message end--')
34
35 # This pattern is for M, A, D.
36 CHANGED_FILE_PATTERN1 = re.compile(r':(\d+) (\d+) (\S+) (\S+) (\w)\s+(\S+)')
37 # This pattern is for R, C.
38 CHANGED_FILE_PATTERN2 = re.compile(
39 r':(\d+) (\d+) (\S+) (\S+) ([A-Z0-9]*)\s+(\S+)\s(\S+)')
40
41 CHANGELOG_START_PATTERN = re.compile(r'^\*\*Changelog start\*\*')
42
43 INITIAL_TO_CHANGE_TYPE = {
44 'M': 'modify',
45 'A': 'add',
46 'D': 'delete',
stgao 2016/11/08 21:44:16 Could we use https://chromium.googlesource.com/inf
Sharu Jiang 2016/11/10 22:28:37 Done.
47 'C': 'copy',
48 'R': 'rename'
49 }
50
51
52 class GitParser(object):
53
54 def __call__(self, output):
55 raise NotImplementedError()
56
57
58 class GitBlameParser(GitParser):
59 """Parses output of 'git blame --porcelain <rev> <file_path>'.
60
61 For example:
62 Git blame output of a Region is:
63 ed268bfed3205347a90557c5029f37e90cc01956 18 18 3
64 author test@google.com
65 author-mail <test@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81>
66 author-time 1363032816
67 author-tz +0000
68 committer test@google.com
69 committer-mail <test@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81>
70 committer-time 1363032816
71 committer-tz +0000
72 summary add (mac) test for ttcindex in SkFontStream
73 previous fe7533eebe777cc66c7f8fa7a03f00572755c5b4 src/core/SkFontStream.h
74 filename src/core/SkFontStream.h
75 * Return the number of shared directories.
76 ed268bfed3205347a90557c5029f37e90cc01956 19 19
77 * if the stream is a normal sfnt (ttf). If there is an error or
78 ed268bfed3205347a90557c5029f37e90cc01956 20 20
79 * no directory is found, return 0.
80
81 Returns:
82 A list of parsed Blame objects.
83 """
84 def __call__(self, output, path, revision): # pylint:disable=W
85 if not output:
86 return None
87
88 blame = Blame(revision, path)
89 commit_info = defaultdict(dict)
90 region_info = None
91 for line in output.splitlines():
92 # Sample: ec3ed6... 2 1 7.
93 match = REGION_START_COUNT_PATTERN.match(line)
94 if match:
95 if region_info:
96 blame.AddRegion(
97 Region(region_info['start'],
98 region_info['count'],
99 region_info['revision'],
100 commit_info[region_info['revision']]['author_name'],
101 commit_info[region_info['revision']]['author_email'],
102 commit_info[region_info['revision']]['author_time']))
103
104 region_info = {'start': int(match.group(2)),
105 'count': int(match.group(3)),
106 'revision': match.group(1)}
107 elif region_info:
108 # Sample: author test@google.com.
109 if AUTHOR_NAME_PATTERN.match(line):
110 commit_info[region_info['revision']]['author_name'] = (
111 AUTHOR_NAME_PATTERN.match(line).group(1))
112 # Sample: author-mail <test@google.com@2eff-a529-9590-31e7-b00076f81>.
113 elif AUTHOR_MAIL_PATTERN.match(line):
114 commit_info[region_info['revision']]['author_email'] = (
115 commit_util.NormalizeEmail(
116 AUTHOR_MAIL_PATTERN.match(line).group(1).replace(
117 '<', '').replace('>', '')))
118 # Sample: author-time 1311863160.
119 elif AUTHOR_TIME_PATTERN.match(line):
120 commit_info[region_info['revision']]['author_time'] = (
121 AUTHOR_TIME_PATTERN.match(line).group(1))
122 # Sample: author-tz +0800.
123 elif AUTHOR_TIMEZONE_PATTERN.match(line):
124 time_zone = time_util.TimeZoneInfo(
125 AUTHOR_TIMEZONE_PATTERN.match(line).group(1))
126 commit_info[region_info['revision']]['author_time'] = (
127 time_zone.LocalToUTC(datetime.fromtimestamp(
128 int(commit_info[region_info['revision']]['author_time']))))
129
130 if region_info:
131 blame.AddRegion(
132 Region(region_info['start'],
133 region_info['count'],
134 region_info['revision'],
135 commit_info[region_info['revision']]['author_name'],
136 commit_info[region_info['revision']]['author_email'],
137 commit_info[region_info['revision']]['author_time']))
138
139 return blame if blame else None
140
141
142 def GetChangeType(initial):
143 """Gets Change type based on the initial character."""
144 return INITIAL_TO_CHANGE_TYPE.get(initial[0])
145
146
147 def GetFileChangeInfo(change_type, path1, path2):
148 """Set old/new path and old/new mode."""
149 if change_type.lower() == 'modify':
150 return {
151 'change_type': change_type,
152 'old_path': path1,
153 'new_path': path1
154 }
155
156 if change_type.lower() == 'add':
157 # Stay the same as gitile.
158 return {
159 'change_type': change_type,
160 'old_path': None,
161 'new_path': path1
162 }
163
164 if change_type.lower() == 'delete':
165 return {
166 'change_type': change_type,
167 'old_path': path1,
168 'new_path': None
169 }
170
171 if change_type.lower() == 'rename' or change_type.lower() == 'copy':
172 return {
173 'change_type': change_type,
174 'old_path': path1,
175 'new_path': path2
176 }
177
178 return None
179
180
181 class GitChangeLogParser(GitParser):
182
183 def __call__(self, output, repo_url): # pylint:disable=W
184 """Parses output of 'git log --pretty=format:<format>.
185
186 For example:
187 Git changelog output is:
188 commit 21a8979218c096f4a96b07b67c9531f5f09e28a3
189 tree 7d9a79c9b060c9a030abe20a8429d2b81ca1d4db
190 parents 9640406d426a2d153b16e1d9ae7f9105268b36c9
191
192 author Test
193 author-email test@google.com
194 author-time 2016-10-24 22:21:45
195
196 committer Test
197 committer-email test@google.com
198 committer-time 2016-10-24 22:25:45
199
200 --Message start--
201 Commit messages...
202 --Message end--
203
204 :100644 100644 25f95f c766f1 M src/a/delta/git_parsers.py
205
206 Returns:
207 Parsed ChangeLog object.
208 """
209 if not output:
210 return None
211
212 is_message_line = False
213 info = {'message':'', 'touched_files':[]}
wrengr 2016/11/08 19:05:16 nit: spaces after colons
Sharu Jiang 2016/11/10 22:28:37 Done.
214 for line in output.splitlines():
215 if MESSAGE_START_PATTERN.match(line):
216 is_message_line = True
217 continue
218
219 if MESSAGE_END_PATTERN.match(line):
220 is_message_line = False
221 # Remove the added '\n' at the end.
222 info['message'] = info['message'][:-1]
223 continue
224
225 if is_message_line:
226 info['message'] += line + '\n'
227 elif COMMIT_HASH_PATTERN.match(line):
228 info['revision'] = COMMIT_HASH_PATTERN.match(line).group(1)
229 elif AUTHOR_NAME_PATTERN.match(line):
230 info['author_name'] = AUTHOR_NAME_PATTERN.match(line).group(1)
231 elif AUTHOR_MAIL_PATTERN.match(line):
232 info['author_email'] = commit_util.NormalizeEmail(
233 AUTHOR_MAIL_PATTERN.match(line).group(1))
234 elif AUTHOR_TIME_PATTERN.match(line):
235 info['author_time'] = datetime.strptime(
236 AUTHOR_TIME_PATTERN.match(line).group(1), DATETIME_FORMAT)
237 elif COMMITTER_NAME_PATTERN.match(line):
238 info['committer_name'] = (
239 COMMITTER_NAME_PATTERN.match(line).group(1))
240 elif COMMITTER_MAIL_PATTERN.match(line):
241 info['committer_email'] = commit_util.NormalizeEmail(
242 COMMITTER_MAIL_PATTERN.match(line).group(1))
243 elif COMMITTER_TIME_PATTERN.match(line):
244 info['committer_time'] = datetime.strptime(
245 COMMITTER_TIME_PATTERN.match(line).group(1), DATETIME_FORMAT)
246 elif (CHANGED_FILE_PATTERN1.match(line) or
247 CHANGED_FILE_PATTERN2.match(line)):
248 match = (CHANGED_FILE_PATTERN1.match(line) or
249 CHANGED_FILE_PATTERN2.match(line))
250 # For modify, add, delete, the pattern is like:
251 # :100644 100644 df565d 6593e M modules/audio_coding/BUILD.gn
252 # For rename, copy, the pattern is like:
253 # :100644 100644 3f2e 20a5 R078 path1 path2
254 info['touched_files'].append(
255 GetFileChangeInfo(GetChangeType(match.group(5)),
256 match.group(6),
257 None if len(match.groups()) < 7
258 else match.group(7)))
259
260 # If commit is not parsed, the changelog will be {'author': {}, 'committer':
261 # {}, 'message': ''}, return None instead.
262 if not 'revision' in info:
263 return None
264
265 info['commit_position'], info['code_review_url'] = (
266 commit_util.ExtractCommitPositionAndCodeReviewUrl(info['message']))
267 info['reverted_revision'] = commit_util.GetRevertedRevision(
268 info['message'])
269 info['commit_url'] = '%s/+/%s' % (repo_url, info['revision'])
270
271 return ChangeLog.FromDict(info)
272
273
274 class GitChangeLogsParser(GitParser):
275
276 def __call__(self, output, repo_url): # pylint:disable=W
277 """Parses output of 'git log --pretty=format:<format> s_rev..e_rev'.
278
279 For example:
280 The output is:
281 **Changelog start**
282 commit 9af040a364c15bdc2adeea794e173a2c529a3ddc
283 tree 27b0421273ed4aea25e497c6d26d9c7db6481852
284 parents c39b0cc8a516de1fa57d032dc0135a4eadfe2c9e
285
286 author author1
287 author-mail author1@chromium.org
288 author-time 2016-10-24 22:21:45
289
290 committer Commit bot
291 committer-mail commit-bot@chromium.org
292 committer-time 2016-10-24 22:23:45
293
294 --Message start--
295 Message 1
296 --Message end--
297
298 :100644 100644 28e117 f12d3 M tools/win32.txt
299
300
301 **Changelog start**
302 commit c39b0cc8a516de1fa57d032dc0135a4eadfe2c9e
303 tree d22d3786e135b83183cfeba5f3d8913959f56299
304 parents ac7ee4ce7b8d39b22a710c58d110e0039c11cf9a
305
306 author author2
307 author-mail author2@chromium.org
308 author-time 2016-10-24 22:22:45
309
310 committer Commit bot
311 committer-mail commit-bot@chromium.org
312 committer-time 2016-10-24 22:23:45
313
314 --Message start--
315 Message2
316 --Message end--
317
318 :100644 100644 7280f df186 M tools/perf/benchmarks/memory_infra.py
319
320 Returns:
321 A list of parsed ChangeLog objects.
322 """
323 if not output:
324 return None
325
326 git_changelog_parser = GitChangeLogParser()
327
328 changelog_str = ''
329 changelogs = []
330 for line in output.splitlines():
331 if CHANGELOG_START_PATTERN.match(line):
332 if not changelog_str:
333 continue
334
335 change_log = git_changelog_parser(changelog_str, repo_url)
336 if change_log:
337 changelogs.append(change_log)
338 changelog_str = ''
339 else:
340 changelog_str += line + '\n'
341
342 change_log = git_changelog_parser(changelog_str, repo_url)
343 if change_log:
344 changelogs.append(change_log)
345
346 return changelogs
347
348
349 class GitDiffParser(GitParser):
350
351 def __call__(self, output):
352 """Returns the raw text output of 'git log --format="" --max-count=1'.
353
354 For example:
355 The output is like:
356
357 diff --git a/chrome/print_header.js b/chrome/print_header.js
358 index 51f25e7..4eec37f 100644
359 --- a/chrome/browser/resources/print_preview/print_header.js
360 +++ b/chrome/browser/resources/print_preview/print_header.js
361 @@ -188,20 +188,25 @@ cr.define('print_preview', function() {
362 var html;
363 var label;
364 if (numPages != numSheets) {
365 - html = loadTimeData.getStringF('printPreviewSummaryFormatLong',
366 - '<b>' + numSheets + '</b>',
367 - '<b>' + summaryLabel + '</b>',
368 - numPages,
369 - pagesLabel);
370 + html = loadTimeData.getStringF(
371 + 'printPreviewSummaryFormatLong',
372 + '<b>' + numSheets.toLocaleString() + '</b>',
373 + '<b>' + summaryLabel + '</b>',
374 + numPages.toLocaleString(),
375 + pagesLabel);
376 label = loadTimeData.getStringF('printPreviewSummaryFormatLong',
377 - numSheets, summaryLabel,
378 - numPages, pagesLabel);
379 + numSheets.toLocaleString(),
380 + summaryLabel,
381 + numPages.toLocaleString(),
382 + pagesLabel);
383 } else {
384 - html = loadTimeData.getStringF('printPreviewSummaryFormatShort',
385 - '<b>' + numSheets + '</b>',
386 - '<b>' + summaryLabel + '</b>');
387 + html = loadTimeData.getStringF(
388 + 'printPreviewSummaryFormatShort',
389 + '<b>' + numSheets.toLocaleString() + '</b>',
390 + '<b>' + summaryLabel + '</b>');
391 label = loadTimeData.getStringF('printPreviewSummaryFormatShort',
392 - numSheets, summaryLabel);
393 + numSheets.toLocaleString(),
394 + summaryLabel);
395 }
396 """
397 return output if output else None
398
399
400 class GitSourceParser(GitParser):
wrengr 2016/11/08 19:05:16 This seems strange. Is the intention to actually d
Sharu Jiang 2016/11/10 22:28:37 No parsing I can think of for now, this is just to
401
402 def __call__(self, output):
403 """Returns the raw text of a file source from 'git show <rev>:<file>'."""
404 return output if output else None
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698