Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2219)

Unified Diff: appengine/findit/common/local_git_parsers.py

Issue 2435863003: [Findit] Add local git parsers. (Closed)
Patch Set: Fix nits. Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: appengine/findit/common/local_git_parsers.py
diff --git a/appengine/findit/common/local_git_parsers.py b/appengine/findit/common/local_git_parsers.py
new file mode 100644
index 0000000000000000000000000000000000000000..e40a0ad2f999fdf2f270091a7845b9895cada736
--- /dev/null
+++ b/appengine/findit/common/local_git_parsers.py
@@ -0,0 +1,400 @@
+# Copyright 2016 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Parse output of local git commands into Gitile response format."""
+
+from collections import defaultdict
+from datetime import datetime
+from datetime import timedelta
+from datetime import time
+from datetime import tzinfo
+import re
+
+from common import repo_util
+from common.blame import Blame
+from common.blame import Region
+from common.change_log import ChangeLog
+from common.change_log import FileChangeInfo
+
+REGION_START_COUNT_PATTERN = re.compile(r'^(\S+) \d+ (\d+) (\d+)')
+
+AUTHOR_NAME_PATTERN = re.compile(r'^author (.*)')
+AUTHOR_MAIL_PATTERN = re.compile(r'^author-mail (\S+)')
+AUTHOR_TIME_PATTERN = re.compile(r'^author-time (\d+)')
+AUTHOR_TIMEZONE_PATTERN = re.compile(r'^author-tz (.*)')
+
+COMMITTER_NAME_PATTERN = re.compile(r'^committer (.*)')
+COMMITTER_MAIL_PATTERN = re.compile(r'^committer-mail (\S+)')
+COMMITTER_TIME_PATTERN = re.compile(r'^committer-time (\d+)')
+
+FILENAME_PATTERN = re.compile(r'filename (\S+)')
+
+COMMIT_HASH_PATTERN = re.compile(r'^commit (\S+)')
+
+MESSAGE_START_PATTERN = re.compile(r'^--Message start--')
+MESSAGE_END_PATTERN = re.compile(r'^--Message end--')
+
+# This pattern is for M, A, D.
+CHANGED_FILE_PATTERN1 = re.compile(r':(\d+) (\d+) (\S+) (\S+) (\w)\s+(\S+)')
+# This pattern is for R, C.
+CHANGED_FILE_PATTERN2 = re.compile(
+ r':(\d+) (\d+) (\S+) (\S+) ([A-Z0-9]*)\s+(\S+)\s(\S+)')
+
+CHANGELOG_START_PATTERN = re.compile(r'^\*\*Changelog start\*\*')
+
+INITIAL_TO_CHANGE_TYPE = {
+ 'M': 'modify',
+ 'A': 'add',
+ 'D': 'delete',
+ 'C': 'copy',
+ 'R': 'rename'
+}
+
+
+class TimeZoneInfo(tzinfo):
+ """Gets time zone info from string like: +0800."""
+
+ def __init__(self, offset_str):
+ super(TimeZoneInfo, self).__init__()
+ offset = int(offset_str[-4:-2]) * 60 + int(offset_str[-2:])
lijeffrey 2016/10/20 23:31:31 is it possible to move offset manipulation to a se
Sharu Jiang 2016/10/21 01:07:42 Done.
+ if offset_str[0] == '-':
+ offset = -offset
+ self._offset = timedelta(minutes=offset)
+ self._tzname = offset_str
+
+ def utcoffset(self, dt=None): # pylint: disable=W0613
+ return self._offset
+
+ def tzname(self, dt=None): # pylint: disable=W0613
+ return self._tzname
+
+ def dst(self, dt=None): # pylint: disable=W0613
+ return timedelta(0)
+
+ def LocalToUTC(self, naive_time):
+ """Localizes naive datetime and converts it to utc naive datetime."""
+ return naive_time - self.utcoffset()
+
+
+class GitParser(object):
+
+ def __call__(self, output):
+ raise NotImplementedError()
+
+
+class GitBlameParser(GitParser):
+ """Parses output of 'git blame --porcelain <rev> <file_path>'.
+
+ For example:
+ Git blame output of a Region is:
+ ed268bfed3205347a90557c5029f37e90cc01956 18 18 3
+ author test@google.com
+ author-mail <test@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81>
+ author-time 1363032816
+ author-tz +0000
+ committer test@google.com
+ committer-mail <test@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81>
+ committer-time 1363032816
+ committer-tz +0000
+ summary add (mac) test for ttcindex in SkFontStream
+ previous fe7533eebe777cc66c7f8fa7a03f00572755c5b4 src/core/SkFontStream.h
+ filename src/core/SkFontStream.h
+ * Return the number of shared directories.
+ ed268bfed3205347a90557c5029f37e90cc01956 19 19
+ * if the stream is a normal sfnt (ttf). If there is an error or
+ ed268bfed3205347a90557c5029f37e90cc01956 20 20
+ * no directory is found, return 0.
+
+ Returns:
+ A list of parsed Blame objects.
+ """
+ def __call__(self, output):
+ commit_to_regions = defaultdict(list)
+ commit_infos = defaultdict(dict)
+ curr_commit = None
+ for line in output.splitlines():
+ # Sample: ec3ed6a5ebf6f2c406d7bcf94b6bc34fcaeb976e 2 1 7
lijeffrey 2016/10/20 23:31:31 pylint will complain about these. how about "ec3ed
Sharu Jiang 2016/10/21 01:07:42 Done.
+ if REGION_START_COUNT_PATTERN.match(line):
+ match = REGION_START_COUNT_PATTERN.match(line)
+ curr_commit = match.group(1)
+ commit_to_regions[curr_commit].append(Region(int(match.group(2)),
+ int(match.group(3)),
+ match.group(1)))
+ elif curr_commit:
+ # Sample: author test@google.com
+ if AUTHOR_NAME_PATTERN.match(line):
+ commit_infos[curr_commit]['author_name'] = (
+ AUTHOR_NAME_PATTERN.match(line).group(1))
+ # Sample: author-mail <test@google.com@2eff-a529-9590-31e7-b00076f81>
+ elif AUTHOR_MAIL_PATTERN.match(line):
+ commit_infos[curr_commit]['author_email'] = repo_util.NormalizeEmail(
+ AUTHOR_MAIL_PATTERN.match(line).group(1).replace(
+ '<', '').replace('>', ''))
+ # Sample: author-time 1311863160
+ elif AUTHOR_TIME_PATTERN.match(line):
+ commit_infos[curr_commit]['author_time'] = (
+ AUTHOR_TIME_PATTERN.match(line).group(1))
+ # Sample: author-tz +0800
+ elif AUTHOR_TIMEZONE_PATTERN.match(line):
+ time_zone = TimeZoneInfo(AUTHOR_TIMEZONE_PATTERN.match(line).group(1))
+ commit_infos[curr_commit]['author_time'] = time_zone.LocalToUTC(
+ datetime.fromtimestamp(
+ int(commit_infos[curr_commit]['author_time'])))
+
+ elif FILENAME_PATTERN.match(line):
+ commit_infos[curr_commit]['path'] = FILENAME_PATTERN.match(
+ line).group(1)
+
+ blames = []
+ for commit, regions in commit_to_regions.iteritems():
+ for region in regions:
+ region.author_name = commit_infos[commit]['author_name']
+ region.author_email = commit_infos[commit]['author_email']
+ region.author_time = commit_infos[commit]['author_time']
+ blames.append(Blame(commit, commit_infos[commit]['path'], regions))
+
+ return blames
+
+
+def GetChangeType(initial):
+ """Gets Change type based on the initial carocter."""
lijeffrey 2016/10/20 23:31:31 character?
Sharu Jiang 2016/10/21 01:07:42 Oops.
+ initial = initial[0]
+ return INITIAL_TO_CHANGE_TYPE.get(initial)
lijeffrey 2016/10/20 23:31:32 why not inline initial[0]?
Sharu Jiang 2016/10/21 01:07:42 Done.
+
+
+def GetFileChangeInfo(change_type, path1, path2):
+ """Set old/new path and old/new mode."""
+ if change_type == 'modify':
lijeffrey 2016/10/20 23:31:31 use .lower() before comparing strings
Sharu Jiang 2016/10/21 01:07:42 Done.
+ return FileChangeInfo(change_type, path1, path1)
+
+ if change_type == 'add':
+ # Stay the same as gitile.
+ return FileChangeInfo(change_type, None, path1)
+
+ if change_type == 'delete':
+ return FileChangeInfo(change_type, path1, None)
+
+ if change_type == 'rename' or change_type == 'copy':
+ return FileChangeInfo(change_type, path1, path2)
+
+ return None
+
+
+class GitChangeLogParser(GitParser):
+
+ def __call__(self, output):
+ """Parses output of 'git log --pretty=format:<format>.
+
+ For example:
+ Git changelog output is:
+ commit 21a8979218c096f4a96b07b67c9531f5f09e28a3
+ tree 7d9a79c9b060c9a030abe20a8429d2b81ca1d4db
+ parents 9640406d426a2d153b16e1d9ae7f9105268b36c9
+
+ author Test
+ author-email test@google.com
+ author-time 1468442226
+
+ committer Test
+ committer-email test@google.com
+ committer-time 1468442226
+
+ --Message start--
+ Commit messages...
+ --Message end--
+
+ :100644 100644 25f95f c766f1 M src/a/delta/git_parsers.py
+
+ Returns:
+ Parsed ChangeLog object.
+ """
+ is_message_line = False
+ change_log = ChangeLog(message='', touched_files=[])
+ for line in output.splitlines():
+ if MESSAGE_START_PATTERN.match(line):
+ is_message_line = True
+ continue
+
+ if MESSAGE_END_PATTERN.match(line):
+ is_message_line = False
+ # Delete the last '\n' added when producing message end pattern.
+ change_log.message = change_log.message[:-1]
lijeffrey 2016/10/20 23:31:32 nit: is it safer to first check the last character
Sharu Jiang 2016/10/21 01:07:42 Since this '\n' is manually added at last, so no n
+ continue
+
+ if is_message_line:
+ change_log.message += line + '\n'
+ elif COMMIT_HASH_PATTERN.match(line):
+ change_log.revision = COMMIT_HASH_PATTERN.match(line).group(1)
+ elif AUTHOR_NAME_PATTERN.match(line):
+ change_log.author_name = AUTHOR_NAME_PATTERN.match(line).group(1)
+ elif AUTHOR_MAIL_PATTERN.match(line):
+ change_log.author_email = repo_util.NormalizeEmail(
+ AUTHOR_MAIL_PATTERN.match(line).group(1))
+ elif AUTHOR_TIME_PATTERN.match(line):
+ change_log.author_time = datetime.utcfromtimestamp(
+ int(AUTHOR_TIME_PATTERN.match(line).group(1)))
+ elif COMMITTER_NAME_PATTERN.match(line):
+ change_log.committer_name = (
+ COMMITTER_NAME_PATTERN.match(line).group(1))
+ elif COMMITTER_MAIL_PATTERN.match(line):
+ change_log.committer_email = repo_util.NormalizeEmail(
+ COMMITTER_MAIL_PATTERN.match(line).group(1))
+ elif COMMITTER_TIME_PATTERN.match(line):
+ change_log.committer_time = datetime.utcfromtimestamp(
+ int(COMMITTER_TIME_PATTERN.match(line).group(1)))
+ elif (CHANGED_FILE_PATTERN1.match(line) or
+ CHANGED_FILE_PATTERN2.match(line)):
+ match = (CHANGED_FILE_PATTERN1.match(line) or
+ CHANGED_FILE_PATTERN2.match(line))
+ # For modify, add, delete, the pattern is like:
+ # :100644 100644 df565d 6593e M modules/audio_coding/BUILD.gn
+ # For rename, copy, the pattern is like:
+ # :100644 100644 3f2e 20a5 R078 path1 path2
+ change_log.touched_files.append(
+ GetFileChangeInfo(GetChangeType(match.group(5)),
+ match.group(6),
+ None if len(match.groups()) < 7
+ else match.group(7)))
+
+ # If commit is not parsed, the changelog will be {'author': {}, 'committer':
+ # {}, 'message': ''}, return None instead.
+ if not change_log.revision:
+ return None
+
+ change_log.commit_position, change_log.code_review_url = (
+ repo_util.ExtractCommitPositionAndCodeReviewUrl(change_log.message))
+ change_log.reverted_revision = repo_util.GetRevertedRevision(
+ change_log.message)
+
+ return change_log
+
+
+class GitChangeLogsParser(GitParser):
+
+ def __call__(self, output):
+ """Parses output of 'git log --pretty=format:<format> s_rev..e_rev'.
+
+ For example:
+ The output is:
+ **Changelog start**
+ commit 9af040a364c15bdc2adeea794e173a2c529a3ddc
+ tree 27b0421273ed4aea25e497c6d26d9c7db6481852
+ parents c39b0cc8a516de1fa57d032dc0135a4eadfe2c9e
+
+ author author1
+ author-mail author1@chromium.org
+ author-time 1464864938
+
+ committer Commit bot
+ committer-mail commit-bot@chromium.org
+ committer-time 1464865033
+
+ --Message start--
+ Message 1
+ --Message end--
+
+ :100644 100644 28e117 f12d3 M tools/win32.txt
+
+
+ **Changelog start**
+ commit c39b0cc8a516de1fa57d032dc0135a4eadfe2c9e
+ tree d22d3786e135b83183cfeba5f3d8913959f56299
+ parents ac7ee4ce7b8d39b22a710c58d110e0039c11cf9a
+
+ author author2
+ author-mail author2@chromium.org
+ author-time 1464864783
+
+ committer Commit bot
+ committer-mail commit-bot@chromium.org
+ committer-time 1464864854
+
+ --Message start--
+ Message2
+ --Message end--
+
+ :100644 100644 7280f df186 M tools/perf/benchmarks/memory_infra.py
+
+ Returns:
+ A list of parsed ChangeLog objects.
+ """
+ git_changelog_parser = GitChangeLogParser()
+
+ changelog_str = ''
+ changelogs = []
+ for line in output.splitlines():
+ if CHANGELOG_START_PATTERN.match(line):
+ if not changelog_str:
+ continue
+
+ change_log = git_changelog_parser(changelog_str)
+ if change_log:
+ changelogs.append(change_log)
+ changelog_str = ''
+ else:
+ changelog_str += line + '\n'
+
+ change_log = git_changelog_parser(changelog_str)
+ if change_log:
+ changelogs.append(change_log)
+
+ return changelogs
+
+
+class GitDiffParser(GitParser):
+
+ def __call__(self, output):
+ """Returns the raw text output of 'git log --format="" --max-count=1'.
+
+ For example:
+ The output is like:
+
+ diff --git a/chrome/print_header.js b/chrome/print_header.js
+ index 51f25e7..4eec37f 100644
+ --- a/chrome/browser/resources/print_preview/print_header.js
+ +++ b/chrome/browser/resources/print_preview/print_header.js
+ @@ -188,20 +188,25 @@ cr.define('print_preview', function() {
+ var html;
+ var label;
+ if (numPages != numSheets) {
+ - html = loadTimeData.getStringF('printPreviewSummaryFormatLong',
+ - '<b>' + numSheets + '</b>',
+ - '<b>' + summaryLabel + '</b>',
+ - numPages,
+ - pagesLabel);
+ + html = loadTimeData.getStringF(
+ + 'printPreviewSummaryFormatLong',
+ + '<b>' + numSheets.toLocaleString() + '</b>',
+ + '<b>' + summaryLabel + '</b>',
+ + numPages.toLocaleString(),
+ + pagesLabel);
+ label = loadTimeData.getStringF('printPreviewSummaryFormatLong',
+ - numSheets, summaryLabel,
+ - numPages, pagesLabel);
+ + numSheets.toLocaleString(),
+ + summaryLabel,
+ + numPages.toLocaleString(),
+ + pagesLabel);
+ } else {
+ - html = loadTimeData.getStringF('printPreviewSummaryFormatShort',
+ - '<b>' + numSheets + '</b>',
+ - '<b>' + summaryLabel + '</b>');
+ + html = loadTimeData.getStringF(
+ + 'printPreviewSummaryFormatShort',
+ + '<b>' + numSheets.toLocaleString() + '</b>',
+ + '<b>' + summaryLabel + '</b>');
+ label = loadTimeData.getStringF('printPreviewSummaryFormatShort',
+ - numSheets, summaryLabel);
+ + numSheets.toLocaleString(),
+ + summaryLabel);
+ }
+ """
+ return output
+
+
+class GitSourceParser(GitParser):
+
+ def __call__(self, output):
+ """Returns the raw text of a file source from 'git show <rev>:<file>'."""
+ return output

Powered by Google App Engine
This is Rietveld 408576698