Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1734)

Unified Diff: appengine/findit/common/git_repository.py

Issue 2344443005: [Findit] Factoring the gitiles (etc) stuff out into its own directory (Closed)
Patch Set: rebase-update Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: appengine/findit/common/git_repository.py
diff --git a/appengine/findit/common/git_repository.py b/appengine/findit/common/git_repository.py
deleted file mode 100644
index ff80fe11050c162f0341750012d5d681a3cb1a33..0000000000000000000000000000000000000000
--- a/appengine/findit/common/git_repository.py
+++ /dev/null
@@ -1,277 +0,0 @@
-# Copyright 2014 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import base64
-from datetime import datetime
-from datetime import timedelta
-import json
-import re
-
-from common import diff
-from common.blame import Blame
-from common.blame import Region
-from common.cache_decorator import Cached
-from common.cache_decorator import CompressedMemCacher
-from common.change_log import ChangeLog
-from common.change_log import FileChangeInfo
-from common.repository import Repository
-
-
-COMMIT_POSITION_PATTERN = re.compile(
- '^Cr-Commit-Position: refs/heads/master@{#(\d+)}$', re.IGNORECASE)
-CODE_REVIEW_URL_PATTERN = re.compile(
- '^(?:Review URL|Review-Url): (.*\d+).*$', re.IGNORECASE)
-REVERTED_REVISION_PATTERN = re.compile(
- '^> Committed: https://.+/([0-9a-fA-F]{40})$', re.IGNORECASE)
-TIMEZONE_PATTERN = re.compile('[-+]\d{4}$')
-CACHE_EXPIRE_TIME_SECONDS = 24 * 60 * 60
-
-
-class GitRepository(Repository):
- """Represents a git repository on https://chromium.googlesource.com."""
-
- def __init__(self, repo_url, http_client):
- super(GitRepository, self).__init__()
- self.repo_url = repo_url
- if self.repo_url.endswith('/'):
- self.repo_url = self.repo_url[:-1]
- self.http_client = http_client
-
- @property
- def identifier(self):
- return self.repo_url
-
- @Cached(namespace='Gitiles-json-view', expire_time=CACHE_EXPIRE_TIME_SECONDS,
- cacher=CompressedMemCacher())
- def _SendRequestForJsonResponse(self, url, params=None):
- if params is None: # pragma: no cover
- params = {}
- params['format'] = 'json'
-
- # Gerrit prepends )]}' to json-formatted response.
- prefix = ')]}\'\n'
-
- status_code, content = self.http_client.Get(url, params)
- if status_code != 200:
- return None
- elif not content or not content.startswith(prefix):
- raise Exception('Response does not begin with %s' % prefix)
-
- return json.loads(content[len(prefix):])
-
- @Cached(namespace='Gitiles-text-view', expire_time=CACHE_EXPIRE_TIME_SECONDS)
- def _SendRequestForTextResponse(self, url):
- status_code, content = self.http_client.Get(url, {'format': 'text'})
- if status_code != 200:
- return None
- return base64.b64decode(content)
-
- def ExtractCommitPositionAndCodeReviewUrl(self, message):
- """Returns the commit position and code review url in the commit message.
-
- A "commit position" is something similar to SVN version ids; i.e.,
- numeric identifiers which are issued in sequential order. The reason
- we care about them is that they're easier for humans to read than
- the hashes that Git uses internally for identifying commits. We
- should never actually use them for *identifying* commits; they're
- only for pretty printing to humans.
-
- Returns:
- (commit_position, code_review_url)
- """
- if not message:
- return (None, None)
-
- commit_position = None
- code_review_url = None
-
- # Commit position and code review url are in the last 5 lines.
- lines = message.strip().split('\n')[-5:]
- lines.reverse()
-
- for line in lines:
- if commit_position is None:
- match = COMMIT_POSITION_PATTERN.match(line)
- if match:
- commit_position = int(match.group(1))
-
- if code_review_url is None:
- match = CODE_REVIEW_URL_PATTERN.match(line)
- if match:
- code_review_url = match.group(1)
- return (commit_position, code_review_url)
-
- def _NormalizeEmail(self, email):
- """Normalizes the email from git repo.
-
- Some email is like: test@chromium.org@bbb929c8-8fbe-4397-9dbb-9b2b20218538.
- """
- parts = email.split('@')
- return '@'.join(parts[0:2])
-
- def _GetDateTimeFromString(self, datetime_string,
- date_format='%a %b %d %H:%M:%S %Y'):
- if TIMEZONE_PATTERN.findall(datetime_string):
- # Need to handle timezone conversion.
- naive_datetime_str, _, offset_str = datetime_string.rpartition(' ')
- naive_datetime = datetime.strptime(naive_datetime_str,
- date_format)
- hour_offset = int(offset_str[-4:-2])
- minute_offset = int(offset_str[-2:])
- if(offset_str[0]) == '-':
- hour_offset = -hour_offset
- minute_offset = -minute_offset
-
- time_delta = timedelta(hours=hour_offset, minutes=minute_offset)
-
- utc_datetime = naive_datetime - time_delta
- return utc_datetime
-
- return datetime.strptime(datetime_string, date_format)
-
- def _DownloadChangeLogData(self, revision):
- url = '%s/+/%s' % (self.repo_url, revision)
- return url, self._SendRequestForJsonResponse(url)
-
- def GetRevertedRevision(self, message):
- """Parse message to get the reverted revision if there is one."""
- lines = message.strip().splitlines()
- if not lines[0].lower().startswith('revert'):
- return None
-
- for line in reversed(lines): # pragma: no cover
- # TODO: Handle cases where no reverted_revision in reverting message.
- reverted_revision_match = REVERTED_REVISION_PATTERN.match(line)
- if reverted_revision_match:
- return reverted_revision_match.group(1)
-
- def _ParseChangeLogFromLogData(self, data):
- commit_position, code_review_url = (
- self.ExtractCommitPositionAndCodeReviewUrl(data['message']))
-
- touched_files = []
- for file_diff in data['tree_diff']:
- change_type = file_diff['type'].lower()
- if not diff.IsKnownChangeType(change_type):
- raise Exception('Unknown change type "%s"' % change_type)
- touched_files.append(
- FileChangeInfo(
- change_type, file_diff['old_path'], file_diff['new_path']))
-
- author_time = self._GetDateTimeFromString(data['author']['time'])
- committer_time = self._GetDateTimeFromString(data['committer']['time'])
- reverted_revision = self.GetRevertedRevision(data['message'])
- url = '%s/+/%s' % (self.repo_url, data['commit'])
-
- return ChangeLog(
- data['author']['name'], self._NormalizeEmail(data['author']['email']),
- author_time,
- data['committer']['name'],
- self._NormalizeEmail(data['committer']['email']),
- committer_time, data['commit'], commit_position,
- data['message'], touched_files, url, code_review_url,
- reverted_revision)
-
- def GetChangeLog(self, revision):
- """Returns the change log of the given revision."""
- _, data = self._DownloadChangeLogData(revision)
- if not data:
- return None
-
- return self._ParseChangeLogFromLogData(data)
-
- def GetCommitsBetweenRevisions(self, start_revision, end_revision, n=1000):
- """Gets a list of commit hashes between start_revision and end_revision.
-
- Args:
- start_revision: The oldest revision in the range.
- end_revision: The latest revision in the range.
- n: The maximum number of revisions to request at a time.
-
- Returns:
- A list of commit hashes made since start_revision through and including
- end_revision in order from most-recent to least-recent. This includes
- end_revision, but not start_revision.
- """
- params = {'n': n}
- next_end_revision = end_revision
- commits = []
-
- while next_end_revision:
- url = '%s/+log/%s..%s' % (
- self.repo_url, start_revision, next_end_revision)
- data = self._SendRequestForJsonResponse(url, params)
-
- if not data:
- break
-
- for log in data.get('log', []):
- commit = log.get('commit')
- if commit:
- commits.append(commit)
-
- next_end_revision = data.get('next')
-
- return commits
-
- def GetChangeDiff(self, revision):
- """Returns the raw diff of the given revision."""
- url = '%s/+/%s%%5E%%21/' % (self.repo_url, revision)
- return self._SendRequestForTextResponse(url)
-
- def GetBlame(self, path, revision):
- """Returns blame of the file at ``path`` of the given revision."""
- url = '%s/+blame/%s/%s' % (self.repo_url, revision, path)
-
- data = self._SendRequestForJsonResponse(url)
- if not data:
- return None
-
- blame = Blame(revision, path)
- for region in data['regions']:
- author_time = self._GetDateTimeFromString(
- region['author']['time'], '%Y-%m-%d %H:%M:%S')
-
- blame.AddRegion(
- Region(region['start'], region['count'], region['commit'],
- region['author']['name'],
- self._NormalizeEmail(region['author']['email']), author_time))
-
- return blame
-
- def GetSource(self, path, revision):
- """Returns source code of the file at ``path`` of the given revision."""
- url = '%s/+/%s/%s' % (self.repo_url, revision, path)
- return self._SendRequestForTextResponse(url)
-
- def GetChangeLogs(self, start_revision, end_revision, n=1000):
- """Gets a list of ChangeLogs in revision range by batch.
-
- Args:
- start_revision (str): The oldest revision in the range.
- end_revision (str): The latest revision in the range.
- n (int): The maximum number of revisions to request at a time (default
- to 1000).
-
- Returns:
- A list of changelogs in (start_revision, end_revision].
- """
- next_end_revision = end_revision
- changelogs = []
-
- while next_end_revision:
- url = '%s/+log/%s..%s' % (self.repo_url,
- start_revision, next_end_revision)
- data = self._SendRequestForJsonResponse(url, params={'n': str(n),
- 'name-status': '1'})
-
- for log in data['log']:
- changelogs.append(self._ParseChangeLogFromLogData(log))
-
- if 'next' in data:
- next_end_revision = data['next']
- else:
- next_end_revision = None
-
- return changelogs

Powered by Google App Engine
This is Rietveld 408576698