Chromium Code Reviews| Index: appengine/findit/lib/gitiles/local_git_parsers.py |
| diff --git a/appengine/findit/lib/gitiles/local_git_parsers.py b/appengine/findit/lib/gitiles/local_git_parsers.py |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..75f59389688fb6019f24af5d3a6d55c7b1eca64a |
| --- /dev/null |
| +++ b/appengine/findit/lib/gitiles/local_git_parsers.py |
| @@ -0,0 +1,394 @@ |
| +# Copyright 2016 The Chromium Authors. All rights reserved. |
|
stgao
2016/11/02 02:04:20
Why this file should be in lib/gitiles? It is not
wrengr
2016/11/03 17:09:01
Fwiw, it makes sense to me for the file to live in
Sharu Jiang
2016/11/03 20:59:48
I found out that app engine does not allow any wri
wrengr
2016/11/03 21:16:09
I don't think util_script is the right place for t
|
| +# Use of this source code is governed by a BSD-style license that can be |
| +# found in the LICENSE file. |
| + |
| +"""Parse output of local git commands into Gitile response format.""" |
| + |
| +from collections import defaultdict |
| +from datetime import datetime |
| +import re |
| + |
| +from common import time_util |
|
wrengr
2016/11/03 17:09:01
Things in ./lib shouldn't depend on things in ./co
Sharu Jiang
2016/11/03 20:59:48
Move the time_util to lib/
|
| +from lib.gitiles.blame import Blame |
| +from lib.gitiles.blame import Region |
| +from lib.gitiles.change_log import ChangeLog |
| +from lib.gitiles import repo_util |
| + |
| +REGION_START_COUNT_PATTERN = re.compile(r'^(\S+) \d+ (\d+) (\d+)') |
| + |
| +DATETIME_FORMAT = '%Y-%m-%d %H:%M:%S' |
| + |
| +AUTHOR_NAME_PATTERN = re.compile(r'^author (.*)') |
| +AUTHOR_MAIL_PATTERN = re.compile(r'^author-mail (\S+)') |
| +AUTHOR_TIME_PATTERN = re.compile(r'^author-time (.+)') |
| +AUTHOR_TIMEZONE_PATTERN = re.compile(r'^author-tz (.*)') |
| + |
| +COMMITTER_NAME_PATTERN = re.compile(r'^committer (.*)') |
| +COMMITTER_MAIL_PATTERN = re.compile(r'^committer-mail (\S+)') |
| +COMMITTER_TIME_PATTERN = re.compile(r'^committer-time (.+)') |
| + |
| +COMMIT_HASH_PATTERN = re.compile(r'^commit (\S+)') |
| + |
| +MESSAGE_START_PATTERN = re.compile(r'^--Message start--') |
| +MESSAGE_END_PATTERN = re.compile(r'^--Message end--') |
| + |
| +# This pattern is for M, A, D. |
| +CHANGED_FILE_PATTERN1 = re.compile(r':(\d+) (\d+) (\S+) (\S+) (\w)\s+(\S+)') |
| +# This pattern is for R, C. |
| +CHANGED_FILE_PATTERN2 = re.compile( |
| + r':(\d+) (\d+) (\S+) (\S+) ([A-Z0-9]*)\s+(\S+)\s(\S+)') |
| + |
| +CHANGELOG_START_PATTERN = re.compile(r'^\*\*Changelog start\*\*') |
| + |
| +INITIAL_TO_CHANGE_TYPE = { |
| + 'M': 'modify', |
| + 'A': 'add', |
| + 'D': 'delete', |
| + 'C': 'copy', |
| + 'R': 'rename' |
| +} |
| + |
| + |
| +class GitParser(object): |
| + |
| + def __call__(self, output): |
| + raise NotImplementedError() |
| + |
| + |
| +class GitBlameParser(GitParser): |
| + """Parses output of 'git blame --porcelain <rev> <file_path>'. |
| + |
| + For example: |
| + Git blame output of a Region is: |
| + ed268bfed3205347a90557c5029f37e90cc01956 18 18 3 |
| + author test@google.com |
| + author-mail <test@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81> |
| + author-time 1363032816 |
| + author-tz +0000 |
| + committer test@google.com |
| + committer-mail <test@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81> |
| + committer-time 1363032816 |
| + committer-tz +0000 |
| + summary add (mac) test for ttcindex in SkFontStream |
| + previous fe7533eebe777cc66c7f8fa7a03f00572755c5b4 src/core/SkFontStream.h |
| + filename src/core/SkFontStream.h |
| + * Return the number of shared directories. |
| + ed268bfed3205347a90557c5029f37e90cc01956 19 19 |
| + * if the stream is a normal sfnt (ttf). If there is an error or |
| + ed268bfed3205347a90557c5029f37e90cc01956 20 20 |
| + * no directory is found, return 0. |
| + |
| + Returns: |
| + A list of parsed Blame objects. |
| + """ |
| + def __call__(self, output, path, revision): # pylint:disable=W |
| + blame = Blame(revision, path) |
| + commit_info = defaultdict(dict) |
| + region_info = None |
| + for line in output.splitlines(): |
| + # Sample: ec3ed6... 2 1 7. |
| + match = REGION_START_COUNT_PATTERN.match(line) |
| + if match: |
| + if region_info: |
| + blame.AddRegion( |
| + Region(region_info['start'], |
| + region_info['count'], |
| + region_info['revision'], |
| + commit_info[region_info['revision']]['author_name'], |
| + commit_info[region_info['revision']]['author_email'], |
| + commit_info[region_info['revision']]['author_time'])) |
| + |
| + region_info = {'start': int(match.group(2)), |
| + 'count': int(match.group(3)), |
| + 'revision': match.group(1)} |
| + elif region_info: |
| + # Sample: author test@google.com. |
| + if AUTHOR_NAME_PATTERN.match(line): |
| + commit_info[region_info['revision']]['author_name'] = ( |
| + AUTHOR_NAME_PATTERN.match(line).group(1)) |
| + # Sample: author-mail <test@google.com@2eff-a529-9590-31e7-b00076f81>. |
| + elif AUTHOR_MAIL_PATTERN.match(line): |
| + commit_info[region_info['revision']]['author_email'] = ( |
| + repo_util.NormalizeEmail( |
| + AUTHOR_MAIL_PATTERN.match(line).group(1).replace( |
| + '<', '').replace('>', ''))) |
| + # Sample: author-time 1311863160. |
| + elif AUTHOR_TIME_PATTERN.match(line): |
| + commit_info[region_info['revision']]['author_time'] = ( |
| + AUTHOR_TIME_PATTERN.match(line).group(1)) |
| + # Sample: author-tz +0800. |
| + elif AUTHOR_TIMEZONE_PATTERN.match(line): |
| + time_zone = time_util.TimeZoneInfo( |
| + AUTHOR_TIMEZONE_PATTERN.match(line).group(1)) |
| + commit_info[region_info['revision']]['author_time'] = ( |
| + time_zone.LocalToUTC(datetime.fromtimestamp( |
| + int(commit_info[region_info['revision']]['author_time'])))) |
| + |
| + if region_info: |
| + blame.AddRegion( |
| + Region(region_info['start'], |
| + region_info['count'], |
| + region_info['revision'], |
| + commit_info[region_info['revision']]['author_name'], |
| + commit_info[region_info['revision']]['author_email'], |
| + commit_info[region_info['revision']]['author_time'])) |
| + |
| + return blame |
| + |
| + |
| +def GetChangeType(initial): |
| + """Gets Change type based on the initial character.""" |
| + return INITIAL_TO_CHANGE_TYPE.get(initial[0]) |
| + |
| + |
| +def GetFileChangeInfo(change_type, path1, path2): |
| + """Set old/new path and old/new mode.""" |
| + if change_type.lower() == 'modify': |
| + return { |
| + 'change_type': change_type, |
| + 'old_path': path1, |
| + 'new_path': path1 |
| + } |
| + |
| + if change_type.lower() == 'add': |
| + # Stay the same as gitile. |
| + return { |
| + 'change_type': change_type, |
| + 'old_path': None, |
| + 'new_path': path1 |
| + } |
| + |
| + if change_type.lower() == 'delete': |
| + return { |
| + 'change_type': change_type, |
| + 'old_path': path1, |
| + 'new_path': None |
| + } |
| + |
| + if change_type.lower() == 'rename' or change_type.lower() == 'copy': |
| + return { |
| + 'change_type': change_type, |
| + 'old_path': path1, |
| + 'new_path': path2 |
| + } |
| + |
| + return None |
| + |
| + |
| +class GitChangeLogParser(GitParser): |
| + |
| + def __call__(self, output, repo_url): # pylint:disable=W |
| + """Parses output of 'git log --pretty=format:<format>. |
| + |
| + For example: |
| + Git changelog output is: |
| + commit 21a8979218c096f4a96b07b67c9531f5f09e28a3 |
| + tree 7d9a79c9b060c9a030abe20a8429d2b81ca1d4db |
| + parents 9640406d426a2d153b16e1d9ae7f9105268b36c9 |
| + |
| + author Test |
| + author-email test@google.com |
| + author-time 2016-10-24 22:21:45 |
| + |
| + committer Test |
| + committer-email test@google.com |
| + committer-time 2016-10-24 22:25:45 |
| + |
| + --Message start-- |
| + Commit messages... |
| + --Message end-- |
| + |
| + :100644 100644 25f95f c766f1 M src/a/delta/git_parsers.py |
| + |
| + Returns: |
| + Parsed ChangeLog object. |
| + """ |
| + is_message_line = False |
| + info = {'message':'', 'touched_files':[]} |
| + for line in output.splitlines(): |
| + if MESSAGE_START_PATTERN.match(line): |
| + is_message_line = True |
| + continue |
| + |
| + if MESSAGE_END_PATTERN.match(line): |
| + is_message_line = False |
| + # Remove the added '\n' at the end. |
| + info['message'] = info['message'][:-1] |
| + continue |
| + |
| + if is_message_line: |
| + info['message'] += line + '\n' |
| + elif COMMIT_HASH_PATTERN.match(line): |
| + info['revision'] = COMMIT_HASH_PATTERN.match(line).group(1) |
| + elif AUTHOR_NAME_PATTERN.match(line): |
| + info['author_name'] = AUTHOR_NAME_PATTERN.match(line).group(1) |
| + elif AUTHOR_MAIL_PATTERN.match(line): |
| + info['author_email'] = repo_util.NormalizeEmail( |
| + AUTHOR_MAIL_PATTERN.match(line).group(1)) |
| + elif AUTHOR_TIME_PATTERN.match(line): |
| + info['author_time'] = datetime.strptime( |
| + AUTHOR_TIME_PATTERN.match(line).group(1), DATETIME_FORMAT) |
| + elif COMMITTER_NAME_PATTERN.match(line): |
| + info['committer_name'] = ( |
| + COMMITTER_NAME_PATTERN.match(line).group(1)) |
| + elif COMMITTER_MAIL_PATTERN.match(line): |
| + info['committer_email'] = repo_util.NormalizeEmail( |
| + COMMITTER_MAIL_PATTERN.match(line).group(1)) |
| + elif COMMITTER_TIME_PATTERN.match(line): |
| + info['committer_time'] = datetime.strptime( |
| + COMMITTER_TIME_PATTERN.match(line).group(1), DATETIME_FORMAT) |
| + elif (CHANGED_FILE_PATTERN1.match(line) or |
| + CHANGED_FILE_PATTERN2.match(line)): |
| + match = (CHANGED_FILE_PATTERN1.match(line) or |
| + CHANGED_FILE_PATTERN2.match(line)) |
| + # For modify, add, delete, the pattern is like: |
| + # :100644 100644 df565d 6593e M modules/audio_coding/BUILD.gn |
| + # For rename, copy, the pattern is like: |
| + # :100644 100644 3f2e 20a5 R078 path1 path2 |
| + info['touched_files'].append( |
| + GetFileChangeInfo(GetChangeType(match.group(5)), |
| + match.group(6), |
| + None if len(match.groups()) < 7 |
| + else match.group(7))) |
| + |
| + # If commit is not parsed, the changelog will be {'author': {}, 'committer': |
| + # {}, 'message': ''}, return None instead. |
| + if not 'revision' in info: |
| + return None |
| + |
| + info['commit_position'], info['code_review_url'] = ( |
| + repo_util.ExtractCommitPositionAndCodeReviewUrl(info['message'])) |
| + info['reverted_revision'] = repo_util.GetRevertedRevision(info['message']) |
| + info['commit_url'] = '%s/+/%s' % (repo_url, info['revision']) |
| + |
| + return ChangeLog.FromDict(info) |
| + |
| + |
| +class GitChangeLogsParser(GitParser): |
| + |
| + def __call__(self, output, repo_url): # pylint:disable=W |
| + """Parses output of 'git log --pretty=format:<format> s_rev..e_rev'. |
| + |
| + For example: |
| + The output is: |
| + **Changelog start** |
| + commit 9af040a364c15bdc2adeea794e173a2c529a3ddc |
| + tree 27b0421273ed4aea25e497c6d26d9c7db6481852 |
| + parents c39b0cc8a516de1fa57d032dc0135a4eadfe2c9e |
| + |
| + author author1 |
| + author-mail author1@chromium.org |
| + author-time 2016-10-24 22:21:45 |
| + |
| + committer Commit bot |
| + committer-mail commit-bot@chromium.org |
| + committer-time 2016-10-24 22:23:45 |
| + |
| + --Message start-- |
| + Message 1 |
| + --Message end-- |
| + |
| + :100644 100644 28e117 f12d3 M tools/win32.txt |
| + |
| + |
| + **Changelog start** |
| + commit c39b0cc8a516de1fa57d032dc0135a4eadfe2c9e |
| + tree d22d3786e135b83183cfeba5f3d8913959f56299 |
| + parents ac7ee4ce7b8d39b22a710c58d110e0039c11cf9a |
| + |
| + author author2 |
| + author-mail author2@chromium.org |
| + author-time 2016-10-24 22:22:45 |
| + |
| + committer Commit bot |
| + committer-mail commit-bot@chromium.org |
| + committer-time 2016-10-24 22:23:45 |
| + |
| + --Message start-- |
| + Message2 |
| + --Message end-- |
| + |
| + :100644 100644 7280f df186 M tools/perf/benchmarks/memory_infra.py |
| + |
| + Returns: |
| + A list of parsed ChangeLog objects. |
| + """ |
| + git_changelog_parser = GitChangeLogParser() |
| + |
| + changelog_str = '' |
| + changelogs = [] |
| + for line in output.splitlines(): |
| + if CHANGELOG_START_PATTERN.match(line): |
| + if not changelog_str: |
| + continue |
| + |
| + change_log = git_changelog_parser(changelog_str, repo_url) |
| + if change_log: |
| + changelogs.append(change_log) |
| + changelog_str = '' |
| + else: |
| + changelog_str += line + '\n' |
| + |
| + change_log = git_changelog_parser(changelog_str, repo_url) |
| + if change_log: |
| + changelogs.append(change_log) |
| + |
| + return changelogs |
| + |
| + |
| +class GitDiffParser(GitParser): |
| + |
| + def __call__(self, output): |
| + """Returns the raw text output of 'git log --format="" --max-count=1'. |
| + |
| + For example: |
| + The output is like: |
| + |
| + diff --git a/chrome/print_header.js b/chrome/print_header.js |
| + index 51f25e7..4eec37f 100644 |
| + --- a/chrome/browser/resources/print_preview/print_header.js |
| + +++ b/chrome/browser/resources/print_preview/print_header.js |
| + @@ -188,20 +188,25 @@ cr.define('print_preview', function() { |
| + var html; |
| + var label; |
| + if (numPages != numSheets) { |
| + - html = loadTimeData.getStringF('printPreviewSummaryFormatLong', |
| + - '<b>' + numSheets + '</b>', |
| + - '<b>' + summaryLabel + '</b>', |
| + - numPages, |
| + - pagesLabel); |
| + + html = loadTimeData.getStringF( |
| + + 'printPreviewSummaryFormatLong', |
| + + '<b>' + numSheets.toLocaleString() + '</b>', |
| + + '<b>' + summaryLabel + '</b>', |
| + + numPages.toLocaleString(), |
| + + pagesLabel); |
| + label = loadTimeData.getStringF('printPreviewSummaryFormatLong', |
| + - numSheets, summaryLabel, |
| + - numPages, pagesLabel); |
| + + numSheets.toLocaleString(), |
| + + summaryLabel, |
| + + numPages.toLocaleString(), |
| + + pagesLabel); |
| + } else { |
| + - html = loadTimeData.getStringF('printPreviewSummaryFormatShort', |
| + - '<b>' + numSheets + '</b>', |
| + - '<b>' + summaryLabel + '</b>'); |
| + + html = loadTimeData.getStringF( |
| + + 'printPreviewSummaryFormatShort', |
| + + '<b>' + numSheets.toLocaleString() + '</b>', |
| + + '<b>' + summaryLabel + '</b>'); |
| + label = loadTimeData.getStringF('printPreviewSummaryFormatShort', |
| + - numSheets, summaryLabel); |
| + + numSheets.toLocaleString(), |
| + + summaryLabel); |
| + } |
| + """ |
| + return output |
| + |
| + |
| +class GitSourceParser(GitParser): |
| + |
| + def __call__(self, output): |
| + """Returns the raw text of a file source from 'git show <rev>:<file>'.""" |
| + return output |