Chromium Code Reviews| Index: appengine/findit/common/local_git_parsers.py |
| diff --git a/appengine/findit/common/local_git_parsers.py b/appengine/findit/common/local_git_parsers.py |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..e40a0ad2f999fdf2f270091a7845b9895cada736 |
| --- /dev/null |
| +++ b/appengine/findit/common/local_git_parsers.py |
| @@ -0,0 +1,400 @@ |
| +# Copyright 2016 The Chromium Authors. All rights reserved. |
| +# Use of this source code is governed by a BSD-style license that can be |
| +# found in the LICENSE file. |
| + |
| +"""Parse output of local git commands into Gitile response format.""" |
| + |
| +from collections import defaultdict |
| +from datetime import datetime |
| +from datetime import timedelta |
| +from datetime import time |
| +from datetime import tzinfo |
| +import re |
| + |
| +from common import repo_util |
| +from common.blame import Blame |
| +from common.blame import Region |
| +from common.change_log import ChangeLog |
| +from common.change_log import FileChangeInfo |
| + |
| +REGION_START_COUNT_PATTERN = re.compile(r'^(\S+) \d+ (\d+) (\d+)') |
| + |
| +AUTHOR_NAME_PATTERN = re.compile(r'^author (.*)') |
| +AUTHOR_MAIL_PATTERN = re.compile(r'^author-mail (\S+)') |
| +AUTHOR_TIME_PATTERN = re.compile(r'^author-time (\d+)') |
| +AUTHOR_TIMEZONE_PATTERN = re.compile(r'^author-tz (.*)') |
| + |
| +COMMITTER_NAME_PATTERN = re.compile(r'^committer (.*)') |
| +COMMITTER_MAIL_PATTERN = re.compile(r'^committer-mail (\S+)') |
| +COMMITTER_TIME_PATTERN = re.compile(r'^committer-time (\d+)') |
| + |
| +FILENAME_PATTERN = re.compile(r'filename (\S+)') |
| + |
| +COMMIT_HASH_PATTERN = re.compile(r'^commit (\S+)') |
| + |
| +MESSAGE_START_PATTERN = re.compile(r'^--Message start--') |
| +MESSAGE_END_PATTERN = re.compile(r'^--Message end--') |
| + |
| +# This pattern is for M, A, D. |
| +CHANGED_FILE_PATTERN1 = re.compile(r':(\d+) (\d+) (\S+) (\S+) (\w)\s+(\S+)') |
| +# This pattern is for R, C. |
| +CHANGED_FILE_PATTERN2 = re.compile( |
| + r':(\d+) (\d+) (\S+) (\S+) ([A-Z0-9]*)\s+(\S+)\s(\S+)') |
| + |
| +CHANGELOG_START_PATTERN = re.compile(r'^\*\*Changelog start\*\*') |
| + |
| +INITIAL_TO_CHANGE_TYPE = { |
| + 'M': 'modify', |
| + 'A': 'add', |
| + 'D': 'delete', |
| + 'C': 'copy', |
| + 'R': 'rename' |
| +} |
| + |
| + |
| +class TimeZoneInfo(tzinfo): |
| + """Gets time zone info from string like: +0800.""" |
| + |
| + def __init__(self, offset_str): |
| + super(TimeZoneInfo, self).__init__() |
| + offset = int(offset_str[-4:-2]) * 60 + int(offset_str[-2:]) |
|
lijeffrey
2016/10/20 23:31:31
is it possible to move offset manipulation to a se
Sharu Jiang
2016/10/21 01:07:42
Done.
|
| + if offset_str[0] == '-': |
| + offset = -offset |
| + self._offset = timedelta(minutes=offset) |
| + self._tzname = offset_str |
| + |
| + def utcoffset(self, dt=None): # pylint: disable=W0613 |
| + return self._offset |
| + |
| + def tzname(self, dt=None): # pylint: disable=W0613 |
| + return self._tzname |
| + |
| + def dst(self, dt=None): # pylint: disable=W0613 |
| + return timedelta(0) |
| + |
| + def LocalToUTC(self, naive_time): |
| + """Localizes naive datetime and converts it to utc naive datetime.""" |
| + return naive_time - self.utcoffset() |
| + |
| + |
| +class GitParser(object): |
| + |
| + def __call__(self, output): |
| + raise NotImplementedError() |
| + |
| + |
| +class GitBlameParser(GitParser): |
| + """Parses output of 'git blame --porcelain <rev> <file_path>'. |
| + |
| + For example: |
| + Git blame output of a Region is: |
| + ed268bfed3205347a90557c5029f37e90cc01956 18 18 3 |
| + author test@google.com |
| + author-mail <test@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81> |
| + author-time 1363032816 |
| + author-tz +0000 |
| + committer test@google.com |
| + committer-mail <test@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81> |
| + committer-time 1363032816 |
| + committer-tz +0000 |
| + summary add (mac) test for ttcindex in SkFontStream |
| + previous fe7533eebe777cc66c7f8fa7a03f00572755c5b4 src/core/SkFontStream.h |
| + filename src/core/SkFontStream.h |
| + * Return the number of shared directories. |
| + ed268bfed3205347a90557c5029f37e90cc01956 19 19 |
| + * if the stream is a normal sfnt (ttf). If there is an error or |
| + ed268bfed3205347a90557c5029f37e90cc01956 20 20 |
| + * no directory is found, return 0. |
| + |
| + Returns: |
| + A list of parsed Blame objects. |
| + """ |
| + def __call__(self, output): |
| + commit_to_regions = defaultdict(list) |
| + commit_infos = defaultdict(dict) |
| + curr_commit = None |
| + for line in output.splitlines(): |
| + # Sample: ec3ed6a5ebf6f2c406d7bcf94b6bc34fcaeb976e 2 1 7 |
|
lijeffrey
2016/10/20 23:31:31
pylint will complain about these. how about "ec3ed
Sharu Jiang
2016/10/21 01:07:42
Done.
|
| + if REGION_START_COUNT_PATTERN.match(line): |
| + match = REGION_START_COUNT_PATTERN.match(line) |
| + curr_commit = match.group(1) |
| + commit_to_regions[curr_commit].append(Region(int(match.group(2)), |
| + int(match.group(3)), |
| + match.group(1))) |
| + elif curr_commit: |
| + # Sample: author test@google.com |
| + if AUTHOR_NAME_PATTERN.match(line): |
| + commit_infos[curr_commit]['author_name'] = ( |
| + AUTHOR_NAME_PATTERN.match(line).group(1)) |
| + # Sample: author-mail <test@google.com@2eff-a529-9590-31e7-b00076f81> |
| + elif AUTHOR_MAIL_PATTERN.match(line): |
| + commit_infos[curr_commit]['author_email'] = repo_util.NormalizeEmail( |
| + AUTHOR_MAIL_PATTERN.match(line).group(1).replace( |
| + '<', '').replace('>', '')) |
| + # Sample: author-time 1311863160 |
| + elif AUTHOR_TIME_PATTERN.match(line): |
| + commit_infos[curr_commit]['author_time'] = ( |
| + AUTHOR_TIME_PATTERN.match(line).group(1)) |
| + # Sample: author-tz +0800 |
| + elif AUTHOR_TIMEZONE_PATTERN.match(line): |
| + time_zone = TimeZoneInfo(AUTHOR_TIMEZONE_PATTERN.match(line).group(1)) |
| + commit_infos[curr_commit]['author_time'] = time_zone.LocalToUTC( |
| + datetime.fromtimestamp( |
| + int(commit_infos[curr_commit]['author_time']))) |
| + |
| + elif FILENAME_PATTERN.match(line): |
| + commit_infos[curr_commit]['path'] = FILENAME_PATTERN.match( |
| + line).group(1) |
| + |
| + blames = [] |
| + for commit, regions in commit_to_regions.iteritems(): |
| + for region in regions: |
| + region.author_name = commit_infos[commit]['author_name'] |
| + region.author_email = commit_infos[commit]['author_email'] |
| + region.author_time = commit_infos[commit]['author_time'] |
| + blames.append(Blame(commit, commit_infos[commit]['path'], regions)) |
| + |
| + return blames |
| + |
| + |
| +def GetChangeType(initial): |
| + """Gets Change type based on the initial carocter.""" |
|
lijeffrey
2016/10/20 23:31:31
character?
Sharu Jiang
2016/10/21 01:07:42
Oops.
|
| + initial = initial[0] |
| + return INITIAL_TO_CHANGE_TYPE.get(initial) |
|
lijeffrey
2016/10/20 23:31:32
why not inline initial[0]?
Sharu Jiang
2016/10/21 01:07:42
Done.
|
| + |
| + |
| +def GetFileChangeInfo(change_type, path1, path2): |
| + """Set old/new path and old/new mode.""" |
| + if change_type == 'modify': |
|
lijeffrey
2016/10/20 23:31:31
use .lower() before comparing strings
Sharu Jiang
2016/10/21 01:07:42
Done.
|
| + return FileChangeInfo(change_type, path1, path1) |
| + |
| + if change_type == 'add': |
| + # Stay the same as gitile. |
| + return FileChangeInfo(change_type, None, path1) |
| + |
| + if change_type == 'delete': |
| + return FileChangeInfo(change_type, path1, None) |
| + |
| + if change_type == 'rename' or change_type == 'copy': |
| + return FileChangeInfo(change_type, path1, path2) |
| + |
| + return None |
| + |
| + |
| +class GitChangeLogParser(GitParser): |
| + |
| + def __call__(self, output): |
| + """Parses output of 'git log --pretty=format:<format>. |
| + |
| + For example: |
| + Git changelog output is: |
| + commit 21a8979218c096f4a96b07b67c9531f5f09e28a3 |
| + tree 7d9a79c9b060c9a030abe20a8429d2b81ca1d4db |
| + parents 9640406d426a2d153b16e1d9ae7f9105268b36c9 |
| + |
| + author Test |
| + author-email test@google.com |
| + author-time 1468442226 |
| + |
| + committer Test |
| + committer-email test@google.com |
| + committer-time 1468442226 |
| + |
| + --Message start-- |
| + Commit messages... |
| + --Message end-- |
| + |
| + :100644 100644 25f95f c766f1 M src/a/delta/git_parsers.py |
| + |
| + Returns: |
| + Parsed ChangeLog object. |
| + """ |
| + is_message_line = False |
| + change_log = ChangeLog(message='', touched_files=[]) |
| + for line in output.splitlines(): |
| + if MESSAGE_START_PATTERN.match(line): |
| + is_message_line = True |
| + continue |
| + |
| + if MESSAGE_END_PATTERN.match(line): |
| + is_message_line = False |
| + # Delete the last '\n' added when producing message end pattern. |
| + change_log.message = change_log.message[:-1] |
|
lijeffrey
2016/10/20 23:31:32
nit: is it safer to first check the last character
Sharu Jiang
2016/10/21 01:07:42
Since this '\n' is manually added at last, so no n
|
| + continue |
| + |
| + if is_message_line: |
| + change_log.message += line + '\n' |
| + elif COMMIT_HASH_PATTERN.match(line): |
| + change_log.revision = COMMIT_HASH_PATTERN.match(line).group(1) |
| + elif AUTHOR_NAME_PATTERN.match(line): |
| + change_log.author_name = AUTHOR_NAME_PATTERN.match(line).group(1) |
| + elif AUTHOR_MAIL_PATTERN.match(line): |
| + change_log.author_email = repo_util.NormalizeEmail( |
| + AUTHOR_MAIL_PATTERN.match(line).group(1)) |
| + elif AUTHOR_TIME_PATTERN.match(line): |
| + change_log.author_time = datetime.utcfromtimestamp( |
| + int(AUTHOR_TIME_PATTERN.match(line).group(1))) |
| + elif COMMITTER_NAME_PATTERN.match(line): |
| + change_log.committer_name = ( |
| + COMMITTER_NAME_PATTERN.match(line).group(1)) |
| + elif COMMITTER_MAIL_PATTERN.match(line): |
| + change_log.committer_email = repo_util.NormalizeEmail( |
| + COMMITTER_MAIL_PATTERN.match(line).group(1)) |
| + elif COMMITTER_TIME_PATTERN.match(line): |
| + change_log.committer_time = datetime.utcfromtimestamp( |
| + int(COMMITTER_TIME_PATTERN.match(line).group(1))) |
| + elif (CHANGED_FILE_PATTERN1.match(line) or |
| + CHANGED_FILE_PATTERN2.match(line)): |
| + match = (CHANGED_FILE_PATTERN1.match(line) or |
| + CHANGED_FILE_PATTERN2.match(line)) |
| + # For modify, add, delete, the pattern is like: |
| + # :100644 100644 df565d 6593e M modules/audio_coding/BUILD.gn |
| + # For rename, copy, the pattern is like: |
| + # :100644 100644 3f2e 20a5 R078 path1 path2 |
| + change_log.touched_files.append( |
| + GetFileChangeInfo(GetChangeType(match.group(5)), |
| + match.group(6), |
| + None if len(match.groups()) < 7 |
| + else match.group(7))) |
| + |
| + # If commit is not parsed, the changelog will be {'author': {}, 'committer': |
| + # {}, 'message': ''}, return None instead. |
| + if not change_log.revision: |
| + return None |
| + |
| + change_log.commit_position, change_log.code_review_url = ( |
| + repo_util.ExtractCommitPositionAndCodeReviewUrl(change_log.message)) |
| + change_log.reverted_revision = repo_util.GetRevertedRevision( |
| + change_log.message) |
| + |
| + return change_log |
| + |
| + |
| +class GitChangeLogsParser(GitParser): |
| + |
| + def __call__(self, output): |
| + """Parses output of 'git log --pretty=format:<format> s_rev..e_rev'. |
| + |
| + For example: |
| + The output is: |
| + **Changelog start** |
| + commit 9af040a364c15bdc2adeea794e173a2c529a3ddc |
| + tree 27b0421273ed4aea25e497c6d26d9c7db6481852 |
| + parents c39b0cc8a516de1fa57d032dc0135a4eadfe2c9e |
| + |
| + author author1 |
| + author-mail author1@chromium.org |
| + author-time 1464864938 |
| + |
| + committer Commit bot |
| + committer-mail commit-bot@chromium.org |
| + committer-time 1464865033 |
| + |
| + --Message start-- |
| + Message 1 |
| + --Message end-- |
| + |
| + :100644 100644 28e117 f12d3 M tools/win32.txt |
| + |
| + |
| + **Changelog start** |
| + commit c39b0cc8a516de1fa57d032dc0135a4eadfe2c9e |
| + tree d22d3786e135b83183cfeba5f3d8913959f56299 |
| + parents ac7ee4ce7b8d39b22a710c58d110e0039c11cf9a |
| + |
| + author author2 |
| + author-mail author2@chromium.org |
| + author-time 1464864783 |
| + |
| + committer Commit bot |
| + committer-mail commit-bot@chromium.org |
| + committer-time 1464864854 |
| + |
| + --Message start-- |
| + Message2 |
| + --Message end-- |
| + |
| + :100644 100644 7280f df186 M tools/perf/benchmarks/memory_infra.py |
| + |
| + Returns: |
| + A list of parsed ChangeLog objects. |
| + """ |
| + git_changelog_parser = GitChangeLogParser() |
| + |
| + changelog_str = '' |
| + changelogs = [] |
| + for line in output.splitlines(): |
| + if CHANGELOG_START_PATTERN.match(line): |
| + if not changelog_str: |
| + continue |
| + |
| + change_log = git_changelog_parser(changelog_str) |
| + if change_log: |
| + changelogs.append(change_log) |
| + changelog_str = '' |
| + else: |
| + changelog_str += line + '\n' |
| + |
| + change_log = git_changelog_parser(changelog_str) |
| + if change_log: |
| + changelogs.append(change_log) |
| + |
| + return changelogs |
| + |
| + |
| +class GitDiffParser(GitParser): |
| + |
| + def __call__(self, output): |
| + """Returns the raw text output of 'git log --format="" --max-count=1'. |
| + |
| + For example: |
| + The output is like: |
| + |
| + diff --git a/chrome/print_header.js b/chrome/print_header.js |
| + index 51f25e7..4eec37f 100644 |
| + --- a/chrome/browser/resources/print_preview/print_header.js |
| + +++ b/chrome/browser/resources/print_preview/print_header.js |
| + @@ -188,20 +188,25 @@ cr.define('print_preview', function() { |
| + var html; |
| + var label; |
| + if (numPages != numSheets) { |
| + - html = loadTimeData.getStringF('printPreviewSummaryFormatLong', |
| + - '<b>' + numSheets + '</b>', |
| + - '<b>' + summaryLabel + '</b>', |
| + - numPages, |
| + - pagesLabel); |
| + + html = loadTimeData.getStringF( |
| + + 'printPreviewSummaryFormatLong', |
| + + '<b>' + numSheets.toLocaleString() + '</b>', |
| + + '<b>' + summaryLabel + '</b>', |
| + + numPages.toLocaleString(), |
| + + pagesLabel); |
| + label = loadTimeData.getStringF('printPreviewSummaryFormatLong', |
| + - numSheets, summaryLabel, |
| + - numPages, pagesLabel); |
| + + numSheets.toLocaleString(), |
| + + summaryLabel, |
| + + numPages.toLocaleString(), |
| + + pagesLabel); |
| + } else { |
| + - html = loadTimeData.getStringF('printPreviewSummaryFormatShort', |
| + - '<b>' + numSheets + '</b>', |
| + - '<b>' + summaryLabel + '</b>'); |
| + + html = loadTimeData.getStringF( |
| + + 'printPreviewSummaryFormatShort', |
| + + '<b>' + numSheets.toLocaleString() + '</b>', |
| + + '<b>' + summaryLabel + '</b>'); |
| + label = loadTimeData.getStringF('printPreviewSummaryFormatShort', |
| + - numSheets, summaryLabel); |
| + + numSheets.toLocaleString(), |
| + + summaryLabel); |
| + } |
| + """ |
| + return output |
| + |
| + |
| +class GitSourceParser(GitParser): |
| + |
| + def __call__(self, output): |
| + """Returns the raw text of a file source from 'git show <rev>:<file>'.""" |
| + return output |