Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(62)

Side by Side Diff: appengine/findit/common/git_repository.py

Issue 2344443005: [Findit] Factoring the gitiles (etc) stuff out into its own directory (Closed)
Patch Set: rebase-update Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # Copyright 2014 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4
5 import base64
6 from datetime import datetime
7 from datetime import timedelta
8 import json
9 import re
10
11 from common import diff
12 from common.blame import Blame
13 from common.blame import Region
14 from common.cache_decorator import Cached
15 from common.cache_decorator import CompressedMemCacher
16 from common.change_log import ChangeLog
17 from common.change_log import FileChangeInfo
18 from common.repository import Repository
19
20
21 COMMIT_POSITION_PATTERN = re.compile(
22 '^Cr-Commit-Position: refs/heads/master@{#(\d+)}$', re.IGNORECASE)
23 CODE_REVIEW_URL_PATTERN = re.compile(
24 '^(?:Review URL|Review-Url): (.*\d+).*$', re.IGNORECASE)
25 REVERTED_REVISION_PATTERN = re.compile(
26 '^> Committed: https://.+/([0-9a-fA-F]{40})$', re.IGNORECASE)
27 TIMEZONE_PATTERN = re.compile('[-+]\d{4}$')
28 CACHE_EXPIRE_TIME_SECONDS = 24 * 60 * 60
29
30
31 class GitRepository(Repository):
32 """Represents a git repository on https://chromium.googlesource.com."""
33
34 def __init__(self, repo_url, http_client):
35 super(GitRepository, self).__init__()
36 self.repo_url = repo_url
37 if self.repo_url.endswith('/'):
38 self.repo_url = self.repo_url[:-1]
39 self.http_client = http_client
40
41 @property
42 def identifier(self):
43 return self.repo_url
44
45 @Cached(namespace='Gitiles-json-view', expire_time=CACHE_EXPIRE_TIME_SECONDS,
46 cacher=CompressedMemCacher())
47 def _SendRequestForJsonResponse(self, url, params=None):
48 if params is None: # pragma: no cover
49 params = {}
50 params['format'] = 'json'
51
52 # Gerrit prepends )]}' to json-formatted response.
53 prefix = ')]}\'\n'
54
55 status_code, content = self.http_client.Get(url, params)
56 if status_code != 200:
57 return None
58 elif not content or not content.startswith(prefix):
59 raise Exception('Response does not begin with %s' % prefix)
60
61 return json.loads(content[len(prefix):])
62
63 @Cached(namespace='Gitiles-text-view', expire_time=CACHE_EXPIRE_TIME_SECONDS)
64 def _SendRequestForTextResponse(self, url):
65 status_code, content = self.http_client.Get(url, {'format': 'text'})
66 if status_code != 200:
67 return None
68 return base64.b64decode(content)
69
70 def ExtractCommitPositionAndCodeReviewUrl(self, message):
71 """Returns the commit position and code review url in the commit message.
72
73 A "commit position" is something similar to SVN version ids; i.e.,
74 numeric identifiers which are issued in sequential order. The reason
75 we care about them is that they're easier for humans to read than
76 the hashes that Git uses internally for identifying commits. They do
77 not have quite the same strong guarantees as SVN version ids (e.g.,
78 it is possible for two different commits to end up with the same
79 commit position under some circumstances). Thus, we should never
80 actually use them for *identifying* commits; they're only for pretty
81 printing to humans.
82
83 Returns:
84 (commit_position, code_review_url)
85 """
86 if not message:
87 return (None, None)
88
89 commit_position = None
90 code_review_url = None
91
92 # Commit position and code review url are in the last 5 lines.
93 lines = message.strip().split('\n')[-5:]
94 lines.reverse()
95
96 for line in lines:
97 if commit_position is None:
98 match = COMMIT_POSITION_PATTERN.match(line)
99 if match:
100 commit_position = int(match.group(1))
101
102 if code_review_url is None:
103 match = CODE_REVIEW_URL_PATTERN.match(line)
104 if match:
105 code_review_url = match.group(1)
106 return (commit_position, code_review_url)
107
108 def _NormalizeEmail(self, email):
109 """Normalizes the email from git repo.
110
111 Some email is like: test@chromium.org@bbb929c8-8fbe-4397-9dbb-9b2b20218538.
112 """
113 parts = email.split('@')
114 return '@'.join(parts[0:2])
115
116 def _GetDateTimeFromString(self, datetime_string,
117 date_format='%a %b %d %H:%M:%S %Y'):
118 if TIMEZONE_PATTERN.findall(datetime_string):
119 # Need to handle timezone conversion.
120 naive_datetime_str, _, offset_str = datetime_string.rpartition(' ')
121 naive_datetime = datetime.strptime(naive_datetime_str,
122 date_format)
123 hour_offset = int(offset_str[-4:-2])
124 minute_offset = int(offset_str[-2:])
125 if(offset_str[0]) == '-':
126 hour_offset = -hour_offset
127 minute_offset = -minute_offset
128
129 time_delta = timedelta(hours=hour_offset, minutes=minute_offset)
130
131 utc_datetime = naive_datetime - time_delta
132 return utc_datetime
133
134 return datetime.strptime(datetime_string, date_format)
135
136 def _DownloadChangeLogData(self, revision):
137 url = '%s/+/%s' % (self.repo_url, revision)
138 return url, self._SendRequestForJsonResponse(url)
139
140 def GetRevertedRevision(self, message):
141 """Parse message to get the reverted revision if there is one."""
142 lines = message.strip().splitlines()
143 if not lines[0].lower().startswith('revert'):
144 return None
145
146 for line in reversed(lines): # pragma: no cover
147 # TODO: Handle cases where no reverted_revision in reverting message.
148 reverted_revision_match = REVERTED_REVISION_PATTERN.match(line)
149 if reverted_revision_match:
150 return reverted_revision_match.group(1)
151
152 def _ParseChangeLogFromLogData(self, data):
153 commit_position, code_review_url = (
154 self.ExtractCommitPositionAndCodeReviewUrl(data['message']))
155
156 touched_files = []
157 for file_diff in data['tree_diff']:
158 change_type = file_diff['type'].lower()
159 if not diff.IsKnownChangeType(change_type):
160 raise Exception('Unknown change type "%s"' % change_type)
161 touched_files.append(
162 FileChangeInfo(
163 change_type, file_diff['old_path'], file_diff['new_path']))
164
165 author_time = self._GetDateTimeFromString(data['author']['time'])
166 committer_time = self._GetDateTimeFromString(data['committer']['time'])
167 reverted_revision = self.GetRevertedRevision(data['message'])
168 url = '%s/+/%s' % (self.repo_url, data['commit'])
169
170 return ChangeLog(
171 data['author']['name'], self._NormalizeEmail(data['author']['email']),
172 author_time,
173 data['committer']['name'],
174 self._NormalizeEmail(data['committer']['email']),
175 committer_time, data['commit'], commit_position,
176 data['message'], touched_files, url, code_review_url,
177 reverted_revision)
178
179 def GetChangeLog(self, revision):
180 """Returns the change log of the given revision."""
181 _, data = self._DownloadChangeLogData(revision)
182 if not data:
183 return None
184
185 return self._ParseChangeLogFromLogData(data)
186
187 def GetCommitsBetweenRevisions(self, start_revision, end_revision, n=1000):
188 """Gets a list of commit hashes between start_revision and end_revision.
189
190 Args:
191 start_revision: The oldest revision in the range.
192 end_revision: The latest revision in the range.
193 n: The maximum number of revisions to request at a time.
194
195 Returns:
196 A list of commit hashes made since start_revision through and including
197 end_revision in order from most-recent to least-recent. This includes
198 end_revision, but not start_revision.
199 """
200 params = {'n': n}
201 next_end_revision = end_revision
202 commits = []
203
204 while next_end_revision:
205 url = '%s/+log/%s..%s' % (
206 self.repo_url, start_revision, next_end_revision)
207 data = self._SendRequestForJsonResponse(url, params)
208
209 if not data:
210 break
211
212 for log in data.get('log', []):
213 commit = log.get('commit')
214 if commit:
215 commits.append(commit)
216
217 next_end_revision = data.get('next')
218
219 return commits
220
221 def GetChangeDiff(self, revision):
222 """Returns the raw diff of the given revision."""
223 url = '%s/+/%s%%5E%%21/' % (self.repo_url, revision)
224 return self._SendRequestForTextResponse(url)
225
226 def GetBlame(self, path, revision):
227 """Returns blame of the file at ``path`` of the given revision."""
228 url = '%s/+blame/%s/%s' % (self.repo_url, revision, path)
229
230 data = self._SendRequestForJsonResponse(url)
231 if not data:
232 return None
233
234 blame = Blame(revision, path)
235 for region in data['regions']:
236 author_time = self._GetDateTimeFromString(
237 region['author']['time'], '%Y-%m-%d %H:%M:%S')
238
239 blame.AddRegion(
240 Region(region['start'], region['count'], region['commit'],
241 region['author']['name'],
242 self._NormalizeEmail(region['author']['email']), author_time))
243
244 return blame
245
246 def GetSource(self, path, revision):
247 """Returns source code of the file at ``path`` of the given revision."""
248 url = '%s/+/%s/%s' % (self.repo_url, revision, path)
249 return self._SendRequestForTextResponse(url)
250
251 def GetChangeLogs(self, start_revision, end_revision, n=1000):
252 """Gets a list of ChangeLogs in revision range by batch.
253
254 Args:
255 start_revision (str): The oldest revision in the range.
256 end_revision (str): The latest revision in the range.
257 n (int): The maximum number of revisions to request at a time (default
258 to 1000).
259
260 Returns:
261 A list of changelogs in (start_revision, end_revision].
262 """
263 next_end_revision = end_revision
264 changelogs = []
265
266 while next_end_revision:
267 url = '%s/+log/%s..%s' % (self.repo_url,
268 start_revision, next_end_revision)
269 data = self._SendRequestForJsonResponse(url, params={'n': str(n),
270 'name-status': '1'})
271
272 for log in data['log']:
273 changelogs.append(self._ParseChangeLogFromLogData(log))
274
275 if 'next' in data:
276 next_end_revision = data['next']
277 else:
278 next_end_revision = None
279
280 return changelogs
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698