Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1322)

Side by Side Diff: appengine/findit/common/git_repository.py

Issue 2344443005: [Findit] Factoring the gitiles (etc) stuff out into its own directory (Closed)
Patch Set: rebase-update Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # Copyright 2014 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4
5 import base64
6 from datetime import datetime
7 from datetime import timedelta
8 import json
9 import re
10
11 from common import diff
12 from common.blame import Blame
13 from common.blame import Region
14 from common.cache_decorator import Cached
15 from common.cache_decorator import CompressedMemCacher
16 from common.change_log import ChangeLog
17 from common.change_log import FileChangeInfo
18 from common.repository import Repository
19
20
21 COMMIT_POSITION_PATTERN = re.compile(
22 '^Cr-Commit-Position: refs/heads/master@{#(\d+)}$', re.IGNORECASE)
23 CODE_REVIEW_URL_PATTERN = re.compile(
24 '^(?:Review URL|Review-Url): (.*\d+).*$', re.IGNORECASE)
25 REVERTED_REVISION_PATTERN = re.compile(
26 '^> Committed: https://.+/([0-9a-fA-F]{40})$', re.IGNORECASE)
27 TIMEZONE_PATTERN = re.compile('[-+]\d{4}$')
28 CACHE_EXPIRE_TIME_SECONDS = 24 * 60 * 60
29
30
31 class GitRepository(Repository):
32 """Represents a git repository on https://chromium.googlesource.com."""
33
34 def __init__(self, repo_url, http_client):
35 super(GitRepository, self).__init__()
36 self.repo_url = repo_url
37 if self.repo_url.endswith('/'):
38 self.repo_url = self.repo_url[:-1]
39 self.http_client = http_client
40
41 @property
42 def identifier(self):
43 return self.repo_url
44
45 @Cached(namespace='Gitiles-json-view', expire_time=CACHE_EXPIRE_TIME_SECONDS,
46 cacher=CompressedMemCacher())
47 def _SendRequestForJsonResponse(self, url, params=None):
48 if params is None: # pragma: no cover
49 params = {}
50 params['format'] = 'json'
51
52 # Gerrit prepends )]}' to json-formatted response.
53 prefix = ')]}\'\n'
54
55 status_code, content = self.http_client.Get(url, params)
56 if status_code != 200:
57 return None
58 elif not content or not content.startswith(prefix):
59 raise Exception('Response does not begin with %s' % prefix)
60
61 return json.loads(content[len(prefix):])
62
63 @Cached(namespace='Gitiles-text-view', expire_time=CACHE_EXPIRE_TIME_SECONDS)
64 def _SendRequestForTextResponse(self, url):
65 status_code, content = self.http_client.Get(url, {'format': 'text'})
66 if status_code != 200:
67 return None
68 return base64.b64decode(content)
69
70 def ExtractCommitPositionAndCodeReviewUrl(self, message):
71 """Returns the commit position and code review url in the commit message.
72
73 A "commit position" is something similar to SVN version ids; i.e.,
74 numeric identifiers which are issued in sequential order. The reason
75 we care about them is that they're easier for humans to read than
76 the hashes that Git uses internally for identifying commits. We
77 should never actually use them for *identifying* commits; they're
78 only for pretty printing to humans.
79
80 Returns:
81 (commit_position, code_review_url)
82 """
83 if not message:
84 return (None, None)
85
86 commit_position = None
87 code_review_url = None
88
89 # Commit position and code review url are in the last 5 lines.
90 lines = message.strip().split('\n')[-5:]
91 lines.reverse()
92
93 for line in lines:
94 if commit_position is None:
95 match = COMMIT_POSITION_PATTERN.match(line)
96 if match:
97 commit_position = int(match.group(1))
98
99 if code_review_url is None:
100 match = CODE_REVIEW_URL_PATTERN.match(line)
101 if match:
102 code_review_url = match.group(1)
103 return (commit_position, code_review_url)
104
105 def _NormalizeEmail(self, email):
106 """Normalizes the email from git repo.
107
108 Some email is like: test@chromium.org@bbb929c8-8fbe-4397-9dbb-9b2b20218538.
109 """
110 parts = email.split('@')
111 return '@'.join(parts[0:2])
112
113 def _GetDateTimeFromString(self, datetime_string,
114 date_format='%a %b %d %H:%M:%S %Y'):
115 if TIMEZONE_PATTERN.findall(datetime_string):
116 # Need to handle timezone conversion.
117 naive_datetime_str, _, offset_str = datetime_string.rpartition(' ')
118 naive_datetime = datetime.strptime(naive_datetime_str,
119 date_format)
120 hour_offset = int(offset_str[-4:-2])
121 minute_offset = int(offset_str[-2:])
122 if(offset_str[0]) == '-':
123 hour_offset = -hour_offset
124 minute_offset = -minute_offset
125
126 time_delta = timedelta(hours=hour_offset, minutes=minute_offset)
127
128 utc_datetime = naive_datetime - time_delta
129 return utc_datetime
130
131 return datetime.strptime(datetime_string, date_format)
132
133 def _DownloadChangeLogData(self, revision):
134 url = '%s/+/%s' % (self.repo_url, revision)
135 return url, self._SendRequestForJsonResponse(url)
136
137 def GetRevertedRevision(self, message):
138 """Parse message to get the reverted revision if there is one."""
139 lines = message.strip().splitlines()
140 if not lines[0].lower().startswith('revert'):
141 return None
142
143 for line in reversed(lines): # pragma: no cover
144 # TODO: Handle cases where no reverted_revision in reverting message.
145 reverted_revision_match = REVERTED_REVISION_PATTERN.match(line)
146 if reverted_revision_match:
147 return reverted_revision_match.group(1)
148
149 def _ParseChangeLogFromLogData(self, data):
150 commit_position, code_review_url = (
151 self.ExtractCommitPositionAndCodeReviewUrl(data['message']))
152
153 touched_files = []
154 for file_diff in data['tree_diff']:
155 change_type = file_diff['type'].lower()
156 if not diff.IsKnownChangeType(change_type):
157 raise Exception('Unknown change type "%s"' % change_type)
158 touched_files.append(
159 FileChangeInfo(
160 change_type, file_diff['old_path'], file_diff['new_path']))
161
162 author_time = self._GetDateTimeFromString(data['author']['time'])
163 committer_time = self._GetDateTimeFromString(data['committer']['time'])
164 reverted_revision = self.GetRevertedRevision(data['message'])
165 url = '%s/+/%s' % (self.repo_url, data['commit'])
166
167 return ChangeLog(
168 data['author']['name'], self._NormalizeEmail(data['author']['email']),
169 author_time,
170 data['committer']['name'],
171 self._NormalizeEmail(data['committer']['email']),
172 committer_time, data['commit'], commit_position,
173 data['message'], touched_files, url, code_review_url,
174 reverted_revision)
175
176 def GetChangeLog(self, revision):
177 """Returns the change log of the given revision."""
178 _, data = self._DownloadChangeLogData(revision)
179 if not data:
180 return None
181
182 return self._ParseChangeLogFromLogData(data)
183
184 def GetCommitsBetweenRevisions(self, start_revision, end_revision, n=1000):
185 """Gets a list of commit hashes between start_revision and end_revision.
186
187 Args:
188 start_revision: The oldest revision in the range.
189 end_revision: The latest revision in the range.
190 n: The maximum number of revisions to request at a time.
191
192 Returns:
193 A list of commit hashes made since start_revision through and including
194 end_revision in order from most-recent to least-recent. This includes
195 end_revision, but not start_revision.
196 """
197 params = {'n': n}
198 next_end_revision = end_revision
199 commits = []
200
201 while next_end_revision:
202 url = '%s/+log/%s..%s' % (
203 self.repo_url, start_revision, next_end_revision)
204 data = self._SendRequestForJsonResponse(url, params)
205
206 if not data:
207 break
208
209 for log in data.get('log', []):
210 commit = log.get('commit')
211 if commit:
212 commits.append(commit)
213
214 next_end_revision = data.get('next')
215
216 return commits
217
218 def GetChangeDiff(self, revision):
219 """Returns the raw diff of the given revision."""
220 url = '%s/+/%s%%5E%%21/' % (self.repo_url, revision)
221 return self._SendRequestForTextResponse(url)
222
223 def GetBlame(self, path, revision):
224 """Returns blame of the file at ``path`` of the given revision."""
225 url = '%s/+blame/%s/%s' % (self.repo_url, revision, path)
226
227 data = self._SendRequestForJsonResponse(url)
228 if not data:
229 return None
230
231 blame = Blame(revision, path)
232 for region in data['regions']:
233 author_time = self._GetDateTimeFromString(
234 region['author']['time'], '%Y-%m-%d %H:%M:%S')
235
236 blame.AddRegion(
237 Region(region['start'], region['count'], region['commit'],
238 region['author']['name'],
239 self._NormalizeEmail(region['author']['email']), author_time))
240
241 return blame
242
243 def GetSource(self, path, revision):
244 """Returns source code of the file at ``path`` of the given revision."""
245 url = '%s/+/%s/%s' % (self.repo_url, revision, path)
246 return self._SendRequestForTextResponse(url)
247
248 def GetChangeLogs(self, start_revision, end_revision, n=1000):
249 """Gets a list of ChangeLogs in revision range by batch.
250
251 Args:
252 start_revision (str): The oldest revision in the range.
253 end_revision (str): The latest revision in the range.
254 n (int): The maximum number of revisions to request at a time (default
255 to 1000).
256
257 Returns:
258 A list of changelogs in (start_revision, end_revision].
259 """
260 next_end_revision = end_revision
261 changelogs = []
262
263 while next_end_revision:
264 url = '%s/+log/%s..%s' % (self.repo_url,
265 start_revision, next_end_revision)
266 data = self._SendRequestForJsonResponse(url, params={'n': str(n),
267 'name-status': '1'})
268
269 for log in data['log']:
270 changelogs.append(self._ParseChangeLogFromLogData(log))
271
272 if 'next' in data:
273 next_end_revision = data['next']
274 else:
275 next_end_revision = None
276
277 return changelogs
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698