Chromium Code Reviews| Index: appengine/findit/common/local_git_repository.py |
| diff --git a/appengine/findit/common/local_git_repository.py b/appengine/findit/common/local_git_repository.py |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..24e40a690bf4000da420f02337edbe7e83d728a3 |
| --- /dev/null |
| +++ b/appengine/findit/common/local_git_repository.py |
| @@ -0,0 +1,133 @@ |
| +# Copyright 2014 The Chromium Authors. All rights reserved. |
| +# Use of this source code is governed by a BSD-style license that can be |
| +# found in the LICENSE file. |
| + |
| +import os |
| +import subprocess |
| +import threading |
| + |
| +from common import local_git_parsers |
| +from common import repo_util |
| +from common.repository import Repository |
| + |
| +_CHANGELOG_FORMAT_STRING = ('commit %H%n' |
| + 'author %an%n' |
| + 'author-mail %ae%n' |
| + 'author-time %ad%n%n' |
| + 'committer %cn%n' |
| + 'committer-mail %ce%n' |
| + 'committer-time %cd%n%n' |
| + '--Message start--%n%B%n--Message end--%n') |
| +_CHANGELOGS_FORMAT_STRING = ('**Changelog start**%%n%s' % |
| + _CHANGELOG_FORMAT_STRING) |
| +CHECKOUT_ROOT_DIR = os.path.join(os.path.expanduser('~'), '.local_checkouts') |
| + |
| + |
| +class LocalGitRepository(Repository): |
| + """Represents local checkout of git repository on chromium host. |
| + |
| + Note, to checkout internal repos automatically which you have access to, |
| + follow the instructions in ('https://g3doc.corp.google.com/company/teams/ |
| + chrome/chrome_build_instructions.md? |
| + cl=head#authentication-to-git-servers-chrome-internalgooglesourcecom') first. |
| + """ |
| + lock = threading.Lock() |
| + # Keep track all the updated repos, so every repo only get updated once. |
| + updated_repos = set() |
|
wrengr
2016/10/25 18:12:18
I'd think this should be private/protected. Would
Sharu Jiang
2016/10/26 01:31:15
no, it is neither written or read by other code. D
|
| + def __init__(self, repo_url=None): |
| + self._host = None |
| + self._repo_path = None |
| + self._repo_url = None |
| + self.repo_url = repo_url |
|
wrengr
2016/10/25 18:12:18
What's the difference between self._repo_url vs se
Sharu Jiang
2016/10/26 01:31:15
the self._repo_url set the initial default value,
|
| + |
| + @property |
| + def repo_path(self): |
| + return self._repo_path |
| + |
| + @property |
| + def real_repo_path(self): |
|
wrengr
2016/10/25 18:12:18
Needs docstring explaining what "real" means here.
Sharu Jiang
2016/10/26 01:31:15
Done.
|
| + return os.path.join(CHECKOUT_ROOT_DIR, self._host, self.repo_path) |
| + |
| + @property |
| + def repo_url(self): |
|
wrengr
2016/10/25 18:12:18
Needs docstring explaining how this differs from t
Sharu Jiang
2016/10/26 01:31:15
Done.
|
| + return self._repo_url |
| + |
| + @repo_url.setter |
| + def repo_url(self, repo_url): |
| + if not repo_url or self._repo_url == repo_url: |
| + return |
| + |
| + self._repo_url = repo_url |
| + url_parts = repo_url.split('https://', 1)[1].split('/') |
|
wrengr
2016/10/25 18:12:18
This regex seems very fragile. Don't we have a lib
Sharu Jiang
2016/10/26 01:31:15
Done.
|
| + |
| + self._host = url_parts[0] |
| + self._repo_path = '/'.join(url_parts[1:]) |
|
wrengr
2016/10/25 18:12:18
Calling join after split is expensive. If sticking
Sharu Jiang
2016/10/26 01:31:15
Done.
|
| + self._CloneOrUpdateRepoIfNeeded() |
| + |
| + def _CloneOrUpdateRepoIfNeeded(self): |
| + """Clones repo, or update it if it didn't got updated before.""" |
| + if self.repo_url in LocalGitRepository.updated_repos: |
| + return |
| + |
| + real_repo_path = os.path.join(CHECKOUT_ROOT_DIR, self.repo_path) |
| + with LocalGitRepository.lock: |
| + # Clone the repo if needed. |
| + if not os.path.exists(real_repo_path): |
| + subprocess.call(['git', 'clone', self.repo_url, real_repo_path]) |
|
wrengr
2016/10/25 18:12:18
We should check the return value to make sure thin
Sharu Jiang
2016/10/26 01:31:15
Done.
|
| + # Update repo if it's already cloned. |
| + else: |
| + # Disable verbose of git pull. |
| + with open(os.devnull, 'w') as null_handle: |
| + subprocess.check_call( |
|
wrengr
2016/10/25 18:12:18
Ditto. Especially given as we're silencing stderr.
Sharu Jiang
2016/10/26 01:31:15
Done.
|
| + 'cd %s; git pull' % real_repo_path, |
| + stdout=null_handle, |
| + stderr=null_handle, |
| + shell=True) |
| + |
| + LocalGitRepository.updated_repos.add(self.repo_url) |
| + |
| + def _GetFinalCommand(self, command): |
| + # Change local time to utc time. |
| + command = 'TZ=UTC %s --date=format-local:"%s"' % ( |
|
stgao
2016/10/25 23:04:23
Should TZ=UTC be set through env? Does this work a
Sharu Jiang
2016/10/26 01:31:15
Do we want to use UTC everywhere? I think it may b
|
| + command, local_git_parsers.DATETIME_FORMAT) |
| + return 'cd %s; %s' % (self.real_repo_path, command) |
|
wrengr
2016/10/25 18:12:18
Should be 'cd %s && %s'. If the cd fails for some
Sharu Jiang
2016/10/26 01:31:15
Done. Good to know this usage :)
|
| + |
| + def GetChangeLog(self, revision): |
| + """Returns the change log of the given revision.""" |
| + command = ('git log --pretty=format:"%s" --max-count=1 --raw ' |
| + '--no-abbrev %s' % (_CHANGELOG_FORMAT_STRING, revision)) |
| + output = repo_util.GetCommandOutput(self._GetFinalCommand(command)) |
| + change_log = local_git_parsers.GitChangeLogParser()(output, self.repo_url) |
| + return change_log |
| + |
| + def GetChangeLogs(self, start_revision, end_revision): # pylint: disable=W |
| + """Returns change log list in (start_revision, end_revision].""" |
| + command = ('git log --pretty=format:"%s" --raw ' |
| + '--no-abbrev %s' % (_CHANGELOGS_FORMAT_STRING, |
| + '%s..%s' % (start_revision, end_revision))) |
| + output = repo_util.GetCommandOutput(self._GetFinalCommand(command)) |
| + return local_git_parsers.GitChangeLogsParser()(output, self.repo_url) |
| + |
| + def GetChangeDiff(self, revision, path=None): # pylint: disable=W |
| + """Returns the diff of the given revision.""" |
| + command = 'git log --format="" --max-count=1 %s' % revision |
| + if path: |
| + command += ' -p %s' % path |
| + output = repo_util.GetCommandOutput(self._GetFinalCommand(command)) |
| + return local_git_parsers.GitDiffParser()(output) |
| + |
| + def GetBlame(self, path, revision): |
| + """Returns blame of the file at ``path`` of the given revision.""" |
|
wrengr
2016/10/25 18:12:18
Is our style to use double backticks when referrin
stgao
2016/10/25 23:04:23
In infra, double backticks is the way to go.
Sharu Jiang
2016/10/26 01:31:15
Acknowledged.
|
| + command = 'git blame --porcelain %s %s' % (path, revision) |
| + output = repo_util.GetCommandOutput(self._GetFinalCommand(command)) |
| + return local_git_parsers.GitBlameParser()(output, path, revision) |
| + |
| + def GetSource(self, path, revision): |
| + """Returns source code of the file at ``path`` of the given revision.""" |
| + # Check whether the requested file exist or not. |
| + if not os.path.isfile(os.path.join(self.real_repo_path, path)): |
| + return None |
| + |
| + command = 'git show %s:%s' % (revision, path) |
| + output = repo_util.GetCommandOutput(self._GetFinalCommand(command)) |
| + return local_git_parsers.GitSourceParser()(output) |