| OLD | NEW |
| (Empty) |
| 1 # Copyright 2014 The Chromium Authors. All rights reserved. | |
| 2 # Use of this source code is governed by a BSD-style license that can be | |
| 3 # found in the LICENSE file. | |
| 4 | |
| 5 import base64 | |
| 6 from datetime import datetime | |
| 7 from datetime import timedelta | |
| 8 import json | |
| 9 import re | |
| 10 | |
| 11 from common import diff | |
| 12 from common.blame import Blame | |
| 13 from common.blame import Region | |
| 14 from common.cache_decorator import Cached | |
| 15 from common.cache_decorator import CompressedMemCacher | |
| 16 from common.change_log import ChangeLog | |
| 17 from common.change_log import FileChangeInfo | |
| 18 from common.repository import Repository | |
| 19 | |
| 20 | |
| 21 COMMIT_POSITION_PATTERN = re.compile( | |
| 22 '^Cr-Commit-Position: refs/heads/master@{#(\d+)}$', re.IGNORECASE) | |
| 23 CODE_REVIEW_URL_PATTERN = re.compile( | |
| 24 '^(?:Review URL|Review-Url): (.*\d+).*$', re.IGNORECASE) | |
| 25 REVERTED_REVISION_PATTERN = re.compile( | |
| 26 '^> Committed: https://.+/([0-9a-fA-F]{40})$', re.IGNORECASE) | |
| 27 TIMEZONE_PATTERN = re.compile('[-+]\d{4}$') | |
| 28 CACHE_EXPIRE_TIME_SECONDS = 24 * 60 * 60 | |
| 29 | |
| 30 | |
| 31 class GitRepository(Repository): | |
| 32 """Represents a git repository on https://chromium.googlesource.com.""" | |
| 33 | |
| 34 def __init__(self, repo_url, http_client): | |
| 35 super(GitRepository, self).__init__() | |
| 36 self.repo_url = repo_url | |
| 37 if self.repo_url.endswith('/'): | |
| 38 self.repo_url = self.repo_url[:-1] | |
| 39 self.http_client = http_client | |
| 40 | |
| 41 @property | |
| 42 def identifier(self): | |
| 43 return self.repo_url | |
| 44 | |
| 45 @Cached(namespace='Gitiles-json-view', expire_time=CACHE_EXPIRE_TIME_SECONDS, | |
| 46 cacher=CompressedMemCacher()) | |
| 47 def _SendRequestForJsonResponse(self, url, params=None): | |
| 48 if params is None: # pragma: no cover | |
| 49 params = {} | |
| 50 params['format'] = 'json' | |
| 51 | |
| 52 # Gerrit prepends )]}' to json-formatted response. | |
| 53 prefix = ')]}\'\n' | |
| 54 | |
| 55 status_code, content = self.http_client.Get(url, params) | |
| 56 if status_code != 200: | |
| 57 return None | |
| 58 elif not content or not content.startswith(prefix): | |
| 59 raise Exception('Response does not begin with %s' % prefix) | |
| 60 | |
| 61 return json.loads(content[len(prefix):]) | |
| 62 | |
| 63 @Cached(namespace='Gitiles-text-view', expire_time=CACHE_EXPIRE_TIME_SECONDS) | |
| 64 def _SendRequestForTextResponse(self, url): | |
| 65 status_code, content = self.http_client.Get(url, {'format': 'text'}) | |
| 66 if status_code != 200: | |
| 67 return None | |
| 68 return base64.b64decode(content) | |
| 69 | |
| 70 def ExtractCommitPositionAndCodeReviewUrl(self, message): | |
| 71 """Returns the commit position and code review url in the commit message. | |
| 72 | |
| 73 A "commit position" is something similar to SVN version ids; i.e., | |
| 74 numeric identifiers which are issued in sequential order. The reason | |
| 75 we care about them is that they're easier for humans to read than | |
| 76 the hashes that Git uses internally for identifying commits. They do | |
| 77 not have quite the same strong guarantees as SVN version ids (e.g., | |
| 78 it is possible for two different commits to end up with the same | |
| 79 commit position under some circumstances). Thus, we should never | |
| 80 actually use them for *identifying* commits; they're only for pretty | |
| 81 printing to humans. | |
| 82 | |
| 83 Returns: | |
| 84 (commit_position, code_review_url) | |
| 85 """ | |
| 86 if not message: | |
| 87 return (None, None) | |
| 88 | |
| 89 commit_position = None | |
| 90 code_review_url = None | |
| 91 | |
| 92 # Commit position and code review url are in the last 5 lines. | |
| 93 lines = message.strip().split('\n')[-5:] | |
| 94 lines.reverse() | |
| 95 | |
| 96 for line in lines: | |
| 97 if commit_position is None: | |
| 98 match = COMMIT_POSITION_PATTERN.match(line) | |
| 99 if match: | |
| 100 commit_position = int(match.group(1)) | |
| 101 | |
| 102 if code_review_url is None: | |
| 103 match = CODE_REVIEW_URL_PATTERN.match(line) | |
| 104 if match: | |
| 105 code_review_url = match.group(1) | |
| 106 return (commit_position, code_review_url) | |
| 107 | |
| 108 def _NormalizeEmail(self, email): | |
| 109 """Normalizes the email from git repo. | |
| 110 | |
| 111 Some email is like: test@chromium.org@bbb929c8-8fbe-4397-9dbb-9b2b20218538. | |
| 112 """ | |
| 113 parts = email.split('@') | |
| 114 return '@'.join(parts[0:2]) | |
| 115 | |
| 116 def _GetDateTimeFromString(self, datetime_string, | |
| 117 date_format='%a %b %d %H:%M:%S %Y'): | |
| 118 if TIMEZONE_PATTERN.findall(datetime_string): | |
| 119 # Need to handle timezone conversion. | |
| 120 naive_datetime_str, _, offset_str = datetime_string.rpartition(' ') | |
| 121 naive_datetime = datetime.strptime(naive_datetime_str, | |
| 122 date_format) | |
| 123 hour_offset = int(offset_str[-4:-2]) | |
| 124 minute_offset = int(offset_str[-2:]) | |
| 125 if(offset_str[0]) == '-': | |
| 126 hour_offset = -hour_offset | |
| 127 minute_offset = -minute_offset | |
| 128 | |
| 129 time_delta = timedelta(hours=hour_offset, minutes=minute_offset) | |
| 130 | |
| 131 utc_datetime = naive_datetime - time_delta | |
| 132 return utc_datetime | |
| 133 | |
| 134 return datetime.strptime(datetime_string, date_format) | |
| 135 | |
| 136 def _DownloadChangeLogData(self, revision): | |
| 137 url = '%s/+/%s' % (self.repo_url, revision) | |
| 138 return url, self._SendRequestForJsonResponse(url) | |
| 139 | |
| 140 def GetRevertedRevision(self, message): | |
| 141 """Parse message to get the reverted revision if there is one.""" | |
| 142 lines = message.strip().splitlines() | |
| 143 if not lines[0].lower().startswith('revert'): | |
| 144 return None | |
| 145 | |
| 146 for line in reversed(lines): # pragma: no cover | |
| 147 # TODO: Handle cases where no reverted_revision in reverting message. | |
| 148 reverted_revision_match = REVERTED_REVISION_PATTERN.match(line) | |
| 149 if reverted_revision_match: | |
| 150 return reverted_revision_match.group(1) | |
| 151 | |
| 152 def _ParseChangeLogFromLogData(self, data): | |
| 153 commit_position, code_review_url = ( | |
| 154 self.ExtractCommitPositionAndCodeReviewUrl(data['message'])) | |
| 155 | |
| 156 touched_files = [] | |
| 157 for file_diff in data['tree_diff']: | |
| 158 change_type = file_diff['type'].lower() | |
| 159 if not diff.IsKnownChangeType(change_type): | |
| 160 raise Exception('Unknown change type "%s"' % change_type) | |
| 161 touched_files.append( | |
| 162 FileChangeInfo( | |
| 163 change_type, file_diff['old_path'], file_diff['new_path'])) | |
| 164 | |
| 165 author_time = self._GetDateTimeFromString(data['author']['time']) | |
| 166 committer_time = self._GetDateTimeFromString(data['committer']['time']) | |
| 167 reverted_revision = self.GetRevertedRevision(data['message']) | |
| 168 url = '%s/+/%s' % (self.repo_url, data['commit']) | |
| 169 | |
| 170 return ChangeLog( | |
| 171 data['author']['name'], self._NormalizeEmail(data['author']['email']), | |
| 172 author_time, | |
| 173 data['committer']['name'], | |
| 174 self._NormalizeEmail(data['committer']['email']), | |
| 175 committer_time, data['commit'], commit_position, | |
| 176 data['message'], touched_files, url, code_review_url, | |
| 177 reverted_revision) | |
| 178 | |
| 179 def GetChangeLog(self, revision): | |
| 180 """Returns the change log of the given revision.""" | |
| 181 _, data = self._DownloadChangeLogData(revision) | |
| 182 if not data: | |
| 183 return None | |
| 184 | |
| 185 return self._ParseChangeLogFromLogData(data) | |
| 186 | |
| 187 def GetCommitsBetweenRevisions(self, start_revision, end_revision, n=1000): | |
| 188 """Gets a list of commit hashes between start_revision and end_revision. | |
| 189 | |
| 190 Args: | |
| 191 start_revision: The oldest revision in the range. | |
| 192 end_revision: The latest revision in the range. | |
| 193 n: The maximum number of revisions to request at a time. | |
| 194 | |
| 195 Returns: | |
| 196 A list of commit hashes made since start_revision through and including | |
| 197 end_revision in order from most-recent to least-recent. This includes | |
| 198 end_revision, but not start_revision. | |
| 199 """ | |
| 200 params = {'n': n} | |
| 201 next_end_revision = end_revision | |
| 202 commits = [] | |
| 203 | |
| 204 while next_end_revision: | |
| 205 url = '%s/+log/%s..%s' % ( | |
| 206 self.repo_url, start_revision, next_end_revision) | |
| 207 data = self._SendRequestForJsonResponse(url, params) | |
| 208 | |
| 209 if not data: | |
| 210 break | |
| 211 | |
| 212 for log in data.get('log', []): | |
| 213 commit = log.get('commit') | |
| 214 if commit: | |
| 215 commits.append(commit) | |
| 216 | |
| 217 next_end_revision = data.get('next') | |
| 218 | |
| 219 return commits | |
| 220 | |
| 221 def GetChangeDiff(self, revision): | |
| 222 """Returns the raw diff of the given revision.""" | |
| 223 url = '%s/+/%s%%5E%%21/' % (self.repo_url, revision) | |
| 224 return self._SendRequestForTextResponse(url) | |
| 225 | |
| 226 def GetBlame(self, path, revision): | |
| 227 """Returns blame of the file at ``path`` of the given revision.""" | |
| 228 url = '%s/+blame/%s/%s' % (self.repo_url, revision, path) | |
| 229 | |
| 230 data = self._SendRequestForJsonResponse(url) | |
| 231 if not data: | |
| 232 return None | |
| 233 | |
| 234 blame = Blame(revision, path) | |
| 235 for region in data['regions']: | |
| 236 author_time = self._GetDateTimeFromString( | |
| 237 region['author']['time'], '%Y-%m-%d %H:%M:%S') | |
| 238 | |
| 239 blame.AddRegion( | |
| 240 Region(region['start'], region['count'], region['commit'], | |
| 241 region['author']['name'], | |
| 242 self._NormalizeEmail(region['author']['email']), author_time)) | |
| 243 | |
| 244 return blame | |
| 245 | |
| 246 def GetSource(self, path, revision): | |
| 247 """Returns source code of the file at ``path`` of the given revision.""" | |
| 248 url = '%s/+/%s/%s' % (self.repo_url, revision, path) | |
| 249 return self._SendRequestForTextResponse(url) | |
| 250 | |
| 251 def GetChangeLogs(self, start_revision, end_revision, n=1000): | |
| 252 """Gets a list of ChangeLogs in revision range by batch. | |
| 253 | |
| 254 Args: | |
| 255 start_revision (str): The oldest revision in the range. | |
| 256 end_revision (str): The latest revision in the range. | |
| 257 n (int): The maximum number of revisions to request at a time (default | |
| 258 to 1000). | |
| 259 | |
| 260 Returns: | |
| 261 A list of changelogs in (start_revision, end_revision]. | |
| 262 """ | |
| 263 next_end_revision = end_revision | |
| 264 changelogs = [] | |
| 265 | |
| 266 while next_end_revision: | |
| 267 url = '%s/+log/%s..%s' % (self.repo_url, | |
| 268 start_revision, next_end_revision) | |
| 269 data = self._SendRequestForJsonResponse(url, params={'n': str(n), | |
| 270 'name-status': '1'}) | |
| 271 | |
| 272 for log in data['log']: | |
| 273 changelogs.append(self._ParseChangeLogFromLogData(log)) | |
| 274 | |
| 275 if 'next' in data: | |
| 276 next_end_revision = data['next'] | |
| 277 else: | |
| 278 next_end_revision = None | |
| 279 | |
| 280 return changelogs | |
| OLD | NEW |