OLD | NEW |
| (Empty) |
1 # Copyright 2014 The Chromium Authors. All rights reserved. | |
2 # Use of this source code is governed by a BSD-style license that can be | |
3 # found in the LICENSE file. | |
4 | |
5 import base64 | |
6 from datetime import datetime | |
7 from datetime import timedelta | |
8 import json | |
9 import re | |
10 | |
11 from common import diff | |
12 from common.blame import Blame | |
13 from common.blame import Region | |
14 from common.cache_decorator import Cached | |
15 from common.cache_decorator import CompressedMemCacher | |
16 from common.change_log import ChangeLog | |
17 from common.change_log import FileChangeInfo | |
18 from common.repository import Repository | |
19 | |
20 | |
21 COMMIT_POSITION_PATTERN = re.compile( | |
22 '^Cr-Commit-Position: refs/heads/master@{#(\d+)}$', re.IGNORECASE) | |
23 CODE_REVIEW_URL_PATTERN = re.compile( | |
24 '^(?:Review URL|Review-Url): (.*\d+).*$', re.IGNORECASE) | |
25 REVERTED_REVISION_PATTERN = re.compile( | |
26 '^> Committed: https://.+/([0-9a-fA-F]{40})$', re.IGNORECASE) | |
27 TIMEZONE_PATTERN = re.compile('[-+]\d{4}$') | |
28 CACHE_EXPIRE_TIME_SECONDS = 24 * 60 * 60 | |
29 | |
30 | |
31 class GitRepository(Repository): | |
32 """Represents a git repository on https://chromium.googlesource.com.""" | |
33 | |
34 def __init__(self, repo_url=None, http_client=None): | |
35 super(GitRepository, self).__init__() | |
36 if repo_url and repo_url.endswith('/'): | |
37 self._repo_url = repo_url[:-1] | |
38 else: | |
39 self._repo_url = repo_url | |
40 | |
41 self._http_client = http_client | |
42 | |
43 @property | |
44 def repo_url(self): | |
45 return self._repo_url | |
46 | |
47 @repo_url.setter | |
48 def repo_url(self, repo_url): | |
49 self._repo_url = repo_url | |
50 | |
51 @property | |
52 def http_client(self): | |
53 return self._http_client | |
54 | |
55 @property | |
56 def identifier(self): | |
57 return self.repo_url | |
58 | |
59 @Cached(namespace='Gitiles-json-view', expire_time=CACHE_EXPIRE_TIME_SECONDS, | |
60 cacher=CompressedMemCacher()) | |
61 def _SendRequestForJsonResponse(self, url, params=None): | |
62 if params is None: # pragma: no cover | |
63 params = {} | |
64 params['format'] = 'json' | |
65 | |
66 # Gerrit prepends )]}' to json-formatted response. | |
67 prefix = ')]}\'\n' | |
68 | |
69 status_code, content = self.http_client.Get(url, params) | |
70 if status_code != 200: | |
71 return None | |
72 elif not content or not content.startswith(prefix): | |
73 raise Exception('Response does not begin with %s' % prefix) | |
74 | |
75 return json.loads(content[len(prefix):]) | |
76 | |
77 @Cached(namespace='Gitiles-text-view', expire_time=CACHE_EXPIRE_TIME_SECONDS) | |
78 def _SendRequestForTextResponse(self, url): | |
79 status_code, content = self.http_client.Get(url, {'format': 'text'}) | |
80 if status_code != 200: | |
81 return None | |
82 return base64.b64decode(content) | |
83 | |
84 def ExtractCommitPositionAndCodeReviewUrl(self, message): | |
85 """Returns the commit position and code review url in the commit message. | |
86 | |
87 A "commit position" is something similar to SVN version ids; i.e., | |
88 numeric identifiers which are issued in sequential order. The reason | |
89 we care about them is that they're easier for humans to read than | |
90 the hashes that Git uses internally for identifying commits. We | |
91 should never actually use them for *identifying* commits; they're | |
92 only for pretty printing to humans. | |
93 | |
94 Returns: | |
95 (commit_position, code_review_url) | |
96 """ | |
97 if not message: | |
98 return (None, None) | |
99 | |
100 commit_position = None | |
101 code_review_url = None | |
102 | |
103 # Commit position and code review url are in the last 5 lines. | |
104 lines = message.strip().split('\n')[-5:] | |
105 lines.reverse() | |
106 | |
107 for line in lines: | |
108 if commit_position is None: | |
109 match = COMMIT_POSITION_PATTERN.match(line) | |
110 if match: | |
111 commit_position = int(match.group(1)) | |
112 | |
113 if code_review_url is None: | |
114 match = CODE_REVIEW_URL_PATTERN.match(line) | |
115 if match: | |
116 code_review_url = match.group(1) | |
117 return (commit_position, code_review_url) | |
118 | |
119 def _NormalizeEmail(self, email): | |
120 """Normalizes the email from git repo. | |
121 | |
122 Some email is like: test@chromium.org@bbb929c8-8fbe-4397-9dbb-9b2b20218538. | |
123 """ | |
124 parts = email.split('@') | |
125 return '@'.join(parts[0:2]) | |
126 | |
127 def _GetDateTimeFromString(self, datetime_string, | |
128 date_format='%a %b %d %H:%M:%S %Y'): | |
129 if TIMEZONE_PATTERN.findall(datetime_string): | |
130 # Need to handle timezone conversion. | |
131 naive_datetime_str, _, offset_str = datetime_string.rpartition(' ') | |
132 naive_datetime = datetime.strptime(naive_datetime_str, | |
133 date_format) | |
134 hour_offset = int(offset_str[-4:-2]) | |
135 minute_offset = int(offset_str[-2:]) | |
136 if(offset_str[0]) == '-': | |
137 hour_offset = -hour_offset | |
138 minute_offset = -minute_offset | |
139 | |
140 time_delta = timedelta(hours=hour_offset, minutes=minute_offset) | |
141 | |
142 utc_datetime = naive_datetime - time_delta | |
143 return utc_datetime | |
144 | |
145 return datetime.strptime(datetime_string, date_format) | |
146 | |
147 def _DownloadChangeLogData(self, revision): | |
148 url = '%s/+/%s' % (self.repo_url, revision) | |
149 return url, self._SendRequestForJsonResponse(url) | |
150 | |
151 def GetRevertedRevision(self, message): | |
152 """Parse message to get the reverted revision if there is one.""" | |
153 lines = message.strip().splitlines() | |
154 if not lines[0].lower().startswith('revert'): | |
155 return None | |
156 | |
157 for line in reversed(lines): # pragma: no cover | |
158 # TODO: Handle cases where no reverted_revision in reverting message. | |
159 reverted_revision_match = REVERTED_REVISION_PATTERN.match(line) | |
160 if reverted_revision_match: | |
161 return reverted_revision_match.group(1) | |
162 | |
163 def _ParseChangeLogFromLogData(self, data): | |
164 commit_position, code_review_url = ( | |
165 self.ExtractCommitPositionAndCodeReviewUrl(data['message'])) | |
166 | |
167 touched_files = [] | |
168 for file_diff in data['tree_diff']: | |
169 change_type = file_diff['type'].lower() | |
170 if not diff.IsKnownChangeType(change_type): | |
171 raise Exception('Unknown change type "%s"' % change_type) | |
172 touched_files.append( | |
173 FileChangeInfo( | |
174 change_type, file_diff['old_path'], file_diff['new_path'])) | |
175 | |
176 author_time = self._GetDateTimeFromString(data['author']['time']) | |
177 committer_time = self._GetDateTimeFromString(data['committer']['time']) | |
178 reverted_revision = self.GetRevertedRevision(data['message']) | |
179 url = '%s/+/%s' % (self.repo_url, data['commit']) | |
180 | |
181 return ChangeLog( | |
182 data['author']['name'], self._NormalizeEmail(data['author']['email']), | |
183 author_time, | |
184 data['committer']['name'], | |
185 self._NormalizeEmail(data['committer']['email']), | |
186 committer_time, data['commit'], commit_position, | |
187 data['message'], touched_files, url, code_review_url, | |
188 reverted_revision) | |
189 | |
190 def GetChangeLog(self, revision): | |
191 """Returns the change log of the given revision.""" | |
192 _, data = self._DownloadChangeLogData(revision) | |
193 if not data: | |
194 return None | |
195 | |
196 return self._ParseChangeLogFromLogData(data) | |
197 | |
198 def GetCommitsBetweenRevisions(self, start_revision, end_revision, n=1000): | |
199 """Gets a list of commit hashes between start_revision and end_revision. | |
200 | |
201 Args: | |
202 start_revision: The oldest revision in the range. | |
203 end_revision: The latest revision in the range. | |
204 n: The maximum number of revisions to request at a time. | |
205 | |
206 Returns: | |
207 A list of commit hashes made since start_revision through and including | |
208 end_revision in order from most-recent to least-recent. This includes | |
209 end_revision, but not start_revision. | |
210 """ | |
211 params = {'n': n} | |
212 next_end_revision = end_revision | |
213 commits = [] | |
214 | |
215 while next_end_revision: | |
216 url = '%s/+log/%s..%s' % ( | |
217 self.repo_url, start_revision, next_end_revision) | |
218 data = self._SendRequestForJsonResponse(url, params) | |
219 | |
220 if not data: | |
221 break | |
222 | |
223 for log in data.get('log', []): | |
224 commit = log.get('commit') | |
225 if commit: | |
226 commits.append(commit) | |
227 | |
228 next_end_revision = data.get('next') | |
229 | |
230 return commits | |
231 | |
232 def GetChangeDiff(self, revision): | |
233 """Returns the raw diff of the given revision.""" | |
234 url = '%s/+/%s%%5E%%21/' % (self.repo_url, revision) | |
235 return self._SendRequestForTextResponse(url) | |
236 | |
237 def GetBlame(self, path, revision): | |
238 """Returns blame of the file at ``path`` of the given revision.""" | |
239 url = '%s/+blame/%s/%s' % (self.repo_url, revision, path) | |
240 | |
241 data = self._SendRequestForJsonResponse(url) | |
242 if not data: | |
243 return None | |
244 | |
245 blame = Blame(revision, path) | |
246 for region in data['regions']: | |
247 author_time = self._GetDateTimeFromString( | |
248 region['author']['time'], '%Y-%m-%d %H:%M:%S') | |
249 | |
250 blame.AddRegion( | |
251 Region(region['start'], region['count'], region['commit'], | |
252 region['author']['name'], | |
253 self._NormalizeEmail(region['author']['email']), author_time)) | |
254 | |
255 return blame | |
256 | |
257 def GetSource(self, path, revision): | |
258 """Returns source code of the file at ``path`` of the given revision.""" | |
259 url = '%s/+/%s/%s' % (self.repo_url, revision, path) | |
260 return self._SendRequestForTextResponse(url) | |
261 | |
262 def GetChangeLogs(self, start_revision, end_revision, n=1000): | |
263 """Gets a list of ChangeLogs in revision range by batch. | |
264 | |
265 Args: | |
266 start_revision (str): The oldest revision in the range. | |
267 end_revision (str): The latest revision in the range. | |
268 n (int): The maximum number of revisions to request at a time (default | |
269 to 1000). | |
270 | |
271 Returns: | |
272 A list of changelogs in (start_revision, end_revision]. | |
273 """ | |
274 next_end_revision = end_revision | |
275 changelogs = [] | |
276 | |
277 while next_end_revision: | |
278 url = '%s/+log/%s..%s' % (self.repo_url, | |
279 start_revision, next_end_revision) | |
280 data = self._SendRequestForJsonResponse(url, params={'n': str(n), | |
281 'name-status': '1'}) | |
282 | |
283 for log in data['log']: | |
284 changelogs.append(self._ParseChangeLogFromLogData(log)) | |
285 | |
286 if 'next' in data: | |
287 next_end_revision = data['next'] | |
288 else: | |
289 next_end_revision = None | |
290 | |
291 return changelogs | |
OLD | NEW |