| Index: appengine/findit/common/git_repository.py
 | 
| diff --git a/appengine/findit/common/git_repository.py b/appengine/findit/common/git_repository.py
 | 
| deleted file mode 100644
 | 
| index 9d49da023fb1b42dfb78e7dc4f2db334005ddb61..0000000000000000000000000000000000000000
 | 
| --- a/appengine/findit/common/git_repository.py
 | 
| +++ /dev/null
 | 
| @@ -1,291 +0,0 @@
 | 
| -# Copyright 2014 The Chromium Authors. All rights reserved.
 | 
| -# Use of this source code is governed by a BSD-style license that can be
 | 
| -# found in the LICENSE file.
 | 
| -
 | 
| -import base64
 | 
| -from datetime import datetime
 | 
| -from datetime import timedelta
 | 
| -import json
 | 
| -import re
 | 
| -
 | 
| -from common import diff
 | 
| -from common.blame import Blame
 | 
| -from common.blame import Region
 | 
| -from common.cache_decorator import Cached
 | 
| -from common.cache_decorator import CompressedMemCacher
 | 
| -from common.change_log import ChangeLog
 | 
| -from common.change_log import FileChangeInfo
 | 
| -from common.repository import Repository
 | 
| -
 | 
| -
 | 
| -COMMIT_POSITION_PATTERN = re.compile(
 | 
| -    '^Cr-Commit-Position: refs/heads/master@{#(\d+)}$', re.IGNORECASE)
 | 
| -CODE_REVIEW_URL_PATTERN = re.compile(
 | 
| -    '^(?:Review URL|Review-Url): (.*\d+).*$', re.IGNORECASE)
 | 
| -REVERTED_REVISION_PATTERN = re.compile(
 | 
| -    '^> Committed: https://.+/([0-9a-fA-F]{40})$', re.IGNORECASE)
 | 
| -TIMEZONE_PATTERN = re.compile('[-+]\d{4}$')
 | 
| -CACHE_EXPIRE_TIME_SECONDS = 24 * 60 * 60
 | 
| -
 | 
| -
 | 
| -class GitRepository(Repository):
 | 
| -  """Represents a git repository on https://chromium.googlesource.com."""
 | 
| -
 | 
| -  def __init__(self, repo_url=None, http_client=None):
 | 
| -    super(GitRepository, self).__init__()
 | 
| -    if repo_url and repo_url.endswith('/'):
 | 
| -      self._repo_url = repo_url[:-1]
 | 
| -    else:
 | 
| -      self._repo_url = repo_url
 | 
| -
 | 
| -    self._http_client = http_client
 | 
| -
 | 
| -  @property
 | 
| -  def repo_url(self):
 | 
| -    return self._repo_url
 | 
| -
 | 
| -  @repo_url.setter
 | 
| -  def repo_url(self, repo_url):
 | 
| -    self._repo_url = repo_url
 | 
| -
 | 
| -  @property
 | 
| -  def http_client(self):
 | 
| -    return self._http_client
 | 
| -
 | 
| -  @property
 | 
| -  def identifier(self):
 | 
| -    return self.repo_url
 | 
| -
 | 
| -  @Cached(namespace='Gitiles-json-view', expire_time=CACHE_EXPIRE_TIME_SECONDS,
 | 
| -          cacher=CompressedMemCacher())
 | 
| -  def _SendRequestForJsonResponse(self, url, params=None):
 | 
| -    if params is None:  # pragma: no cover
 | 
| -      params = {}
 | 
| -    params['format'] = 'json'
 | 
| -
 | 
| -    # Gerrit prepends )]}' to json-formatted response.
 | 
| -    prefix = ')]}\'\n'
 | 
| -
 | 
| -    status_code, content = self.http_client.Get(url, params)
 | 
| -    if status_code != 200:
 | 
| -      return None
 | 
| -    elif not content or not content.startswith(prefix):
 | 
| -      raise Exception('Response does not begin with %s' % prefix)
 | 
| -
 | 
| -    return json.loads(content[len(prefix):])
 | 
| -
 | 
| -  @Cached(namespace='Gitiles-text-view', expire_time=CACHE_EXPIRE_TIME_SECONDS)
 | 
| -  def _SendRequestForTextResponse(self, url):
 | 
| -    status_code, content = self.http_client.Get(url, {'format': 'text'})
 | 
| -    if status_code != 200:
 | 
| -      return None
 | 
| -    return base64.b64decode(content)
 | 
| -
 | 
| -  def ExtractCommitPositionAndCodeReviewUrl(self, message):
 | 
| -    """Returns the commit position and code review url in the commit message.
 | 
| -
 | 
| -    A "commit position" is something similar to SVN version ids; i.e.,
 | 
| -    numeric identifiers which are issued in sequential order. The reason
 | 
| -    we care about them is that they're easier for humans to read than
 | 
| -    the hashes that Git uses internally for identifying commits. We
 | 
| -    should never actually use them for *identifying* commits; they're
 | 
| -    only for pretty printing to humans.
 | 
| -
 | 
| -    Returns:
 | 
| -      (commit_position, code_review_url)
 | 
| -    """
 | 
| -    if not message:
 | 
| -      return (None, None)
 | 
| -
 | 
| -    commit_position = None
 | 
| -    code_review_url = None
 | 
| -
 | 
| -    # Commit position and code review url are in the last 5 lines.
 | 
| -    lines = message.strip().split('\n')[-5:]
 | 
| -    lines.reverse()
 | 
| -
 | 
| -    for line in lines:
 | 
| -      if commit_position is None:
 | 
| -        match = COMMIT_POSITION_PATTERN.match(line)
 | 
| -        if match:
 | 
| -          commit_position = int(match.group(1))
 | 
| -
 | 
| -      if code_review_url is None:
 | 
| -        match = CODE_REVIEW_URL_PATTERN.match(line)
 | 
| -        if match:
 | 
| -          code_review_url = match.group(1)
 | 
| -    return (commit_position, code_review_url)
 | 
| -
 | 
| -  def _NormalizeEmail(self, email):
 | 
| -    """Normalizes the email from git repo.
 | 
| -
 | 
| -    Some email is like: test@chromium.org@bbb929c8-8fbe-4397-9dbb-9b2b20218538.
 | 
| -    """
 | 
| -    parts = email.split('@')
 | 
| -    return '@'.join(parts[0:2])
 | 
| -
 | 
| -  def _GetDateTimeFromString(self, datetime_string,
 | 
| -                             date_format='%a %b %d %H:%M:%S %Y'):
 | 
| -    if TIMEZONE_PATTERN.findall(datetime_string):
 | 
| -      # Need to handle timezone conversion.
 | 
| -      naive_datetime_str, _, offset_str = datetime_string.rpartition(' ')
 | 
| -      naive_datetime = datetime.strptime(naive_datetime_str,
 | 
| -                                         date_format)
 | 
| -      hour_offset = int(offset_str[-4:-2])
 | 
| -      minute_offset = int(offset_str[-2:])
 | 
| -      if(offset_str[0]) == '-':
 | 
| -        hour_offset = -hour_offset
 | 
| -        minute_offset = -minute_offset
 | 
| -
 | 
| -      time_delta = timedelta(hours=hour_offset, minutes=minute_offset)
 | 
| -
 | 
| -      utc_datetime = naive_datetime - time_delta
 | 
| -      return utc_datetime
 | 
| -
 | 
| -    return datetime.strptime(datetime_string, date_format)
 | 
| -
 | 
| -  def _DownloadChangeLogData(self, revision):
 | 
| -    url = '%s/+/%s' % (self.repo_url, revision)
 | 
| -    return url, self._SendRequestForJsonResponse(url)
 | 
| -
 | 
| -  def GetRevertedRevision(self, message):
 | 
| -    """Parse message to get the reverted revision if there is one."""
 | 
| -    lines = message.strip().splitlines()
 | 
| -    if not lines[0].lower().startswith('revert'):
 | 
| -      return None
 | 
| -
 | 
| -    for line in reversed(lines):  # pragma: no cover
 | 
| -      # TODO: Handle cases where no reverted_revision in reverting message.
 | 
| -      reverted_revision_match = REVERTED_REVISION_PATTERN.match(line)
 | 
| -      if reverted_revision_match:
 | 
| -        return reverted_revision_match.group(1)
 | 
| -
 | 
| -  def _ParseChangeLogFromLogData(self, data):
 | 
| -    commit_position, code_review_url = (
 | 
| -        self.ExtractCommitPositionAndCodeReviewUrl(data['message']))
 | 
| -
 | 
| -    touched_files = []
 | 
| -    for file_diff in data['tree_diff']:
 | 
| -      change_type = file_diff['type'].lower()
 | 
| -      if not diff.IsKnownChangeType(change_type):
 | 
| -        raise Exception('Unknown change type "%s"' % change_type)
 | 
| -      touched_files.append(
 | 
| -          FileChangeInfo(
 | 
| -              change_type, file_diff['old_path'], file_diff['new_path']))
 | 
| -
 | 
| -    author_time = self._GetDateTimeFromString(data['author']['time'])
 | 
| -    committer_time = self._GetDateTimeFromString(data['committer']['time'])
 | 
| -    reverted_revision = self.GetRevertedRevision(data['message'])
 | 
| -    url = '%s/+/%s' % (self.repo_url, data['commit'])
 | 
| -
 | 
| -    return ChangeLog(
 | 
| -        data['author']['name'], self._NormalizeEmail(data['author']['email']),
 | 
| -        author_time,
 | 
| -        data['committer']['name'],
 | 
| -        self._NormalizeEmail(data['committer']['email']),
 | 
| -        committer_time, data['commit'], commit_position,
 | 
| -        data['message'], touched_files, url, code_review_url,
 | 
| -        reverted_revision)
 | 
| -
 | 
| -  def GetChangeLog(self, revision):
 | 
| -    """Returns the change log of the given revision."""
 | 
| -    _, data = self._DownloadChangeLogData(revision)
 | 
| -    if not data:
 | 
| -      return None
 | 
| -
 | 
| -    return self._ParseChangeLogFromLogData(data)
 | 
| -
 | 
| -  def GetCommitsBetweenRevisions(self, start_revision, end_revision, n=1000):
 | 
| -    """Gets a list of commit hashes between start_revision and end_revision.
 | 
| -
 | 
| -    Args:
 | 
| -      start_revision: The oldest revision in the range.
 | 
| -      end_revision: The latest revision in the range.
 | 
| -      n: The maximum number of revisions to request at a time.
 | 
| -
 | 
| -    Returns:
 | 
| -      A list of commit hashes made since start_revision through and including
 | 
| -      end_revision in order from most-recent to least-recent. This includes
 | 
| -      end_revision, but not start_revision.
 | 
| -    """
 | 
| -    params = {'n': n}
 | 
| -    next_end_revision = end_revision
 | 
| -    commits = []
 | 
| -
 | 
| -    while next_end_revision:
 | 
| -      url = '%s/+log/%s..%s' % (
 | 
| -          self.repo_url, start_revision, next_end_revision)
 | 
| -      data = self._SendRequestForJsonResponse(url, params)
 | 
| -
 | 
| -      if not data:
 | 
| -        break
 | 
| -
 | 
| -      for log in data.get('log', []):
 | 
| -        commit = log.get('commit')
 | 
| -        if commit:
 | 
| -          commits.append(commit)
 | 
| -
 | 
| -      next_end_revision = data.get('next')
 | 
| -
 | 
| -    return commits
 | 
| -
 | 
| -  def GetChangeDiff(self, revision):
 | 
| -    """Returns the raw diff of the given revision."""
 | 
| -    url = '%s/+/%s%%5E%%21/' % (self.repo_url, revision)
 | 
| -    return self._SendRequestForTextResponse(url)
 | 
| -
 | 
| -  def GetBlame(self, path, revision):
 | 
| -    """Returns blame of the file at ``path`` of the given revision."""
 | 
| -    url = '%s/+blame/%s/%s' % (self.repo_url, revision, path)
 | 
| -
 | 
| -    data = self._SendRequestForJsonResponse(url)
 | 
| -    if not data:
 | 
| -      return None
 | 
| -
 | 
| -    blame = Blame(revision, path)
 | 
| -    for region in data['regions']:
 | 
| -      author_time = self._GetDateTimeFromString(
 | 
| -          region['author']['time'], '%Y-%m-%d %H:%M:%S')
 | 
| -
 | 
| -      blame.AddRegion(
 | 
| -          Region(region['start'], region['count'], region['commit'],
 | 
| -                 region['author']['name'],
 | 
| -                 self._NormalizeEmail(region['author']['email']), author_time))
 | 
| -
 | 
| -    return blame
 | 
| -
 | 
| -  def GetSource(self, path, revision):
 | 
| -    """Returns source code of the file at ``path`` of the given revision."""
 | 
| -    url = '%s/+/%s/%s' % (self.repo_url, revision, path)
 | 
| -    return self._SendRequestForTextResponse(url)
 | 
| -
 | 
| -  def GetChangeLogs(self, start_revision, end_revision, n=1000):
 | 
| -    """Gets a list of ChangeLogs in revision range by batch.
 | 
| -
 | 
| -    Args:
 | 
| -      start_revision (str): The oldest revision in the range.
 | 
| -      end_revision (str): The latest revision in the range.
 | 
| -      n (int): The maximum number of revisions to request at a time (default
 | 
| -        to 1000).
 | 
| -
 | 
| -    Returns:
 | 
| -      A list of changelogs in (start_revision, end_revision].
 | 
| -    """
 | 
| -    next_end_revision = end_revision
 | 
| -    changelogs = []
 | 
| -
 | 
| -    while next_end_revision:
 | 
| -      url = '%s/+log/%s..%s' % (self.repo_url,
 | 
| -                                start_revision, next_end_revision)
 | 
| -      data = self._SendRequestForJsonResponse(url, params={'n': str(n),
 | 
| -                                                           'name-status': '1'})
 | 
| -
 | 
| -      for log in data['log']:
 | 
| -        changelogs.append(self._ParseChangeLogFromLogData(log))
 | 
| -
 | 
| -      if 'next' in data:
 | 
| -        next_end_revision = data['next']
 | 
| -      else:
 | 
| -        next_end_revision = None
 | 
| -
 | 
| -    return changelogs
 | 
| 
 |