Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(177)

Side by Side Diff: appengine/findit/common/git_repository.py

Issue 2435863003: [Findit] Add local git parsers. (Closed)
Patch Set: Fix nits. Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2014 The Chromium Authors. All rights reserved. 1 # Copyright 2014 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import base64 5 import base64
6 from datetime import datetime 6 from datetime import datetime
7 from datetime import timedelta 7 from datetime import timedelta
8 import json 8 import json
9 import re 9 import re
10 10
11 from common import diff 11 from common import diff
12 from common import repo_util
12 from common.blame import Blame 13 from common.blame import Blame
13 from common.blame import Region 14 from common.blame import Region
14 from common.cache_decorator import Cached 15 from common.cache_decorator import Cached
15 from common.cache_decorator import CompressedMemCacher 16 from common.cache_decorator import CompressedMemCacher
16 from common.change_log import ChangeLog 17 from common.change_log import ChangeLog
17 from common.change_log import FileChangeInfo 18 from common.change_log import FileChangeInfo
18 from common.repository import Repository 19 from common.repository import Repository
19 20
20
21 COMMIT_POSITION_PATTERN = re.compile(
22 '^Cr-Commit-Position: refs/heads/master@{#(\d+)}$', re.IGNORECASE)
23 CODE_REVIEW_URL_PATTERN = re.compile(
24 '^(?:Review URL|Review-Url): (.*\d+).*$', re.IGNORECASE)
25 REVERTED_REVISION_PATTERN = re.compile(
26 '^> Committed: https://.+/([0-9a-fA-F]{40})$', re.IGNORECASE)
27 TIMEZONE_PATTERN = re.compile('[-+]\d{4}$') 21 TIMEZONE_PATTERN = re.compile('[-+]\d{4}$')
28 CACHE_EXPIRE_TIME_SECONDS = 24 * 60 * 60 22 CACHE_EXPIRE_TIME_SECONDS = 24 * 60 * 60
29 23
30 24
31 class GitRepository(Repository): 25 class GitRepository(Repository):
32 """Represents a git repository on https://chromium.googlesource.com.""" 26 """Represents a git repository on https://chromium.googlesource.com."""
33 27
34 def __init__(self, repo_url=None, http_client=None): 28 def __init__(self, repo_url=None, http_client=None):
35 super(GitRepository, self).__init__() 29 super(GitRepository, self).__init__()
36 if repo_url and repo_url.endswith('/'): 30 if repo_url and repo_url.endswith('/'):
37 self._repo_url = repo_url[:-1] 31 self._repo_url = repo_url[:-1]
38 else: 32 else:
39 self._repo_url = repo_url 33 self._repo_url = repo_url
40 34
41 self._http_client = http_client 35 self._http_client = http_client
42 36
43 @property 37 @property
44 def repo_url(self): 38 def repo_url(self):
45 return self._repo_url 39 return self._repo_url
46 40
47 @repo_url.setter 41 @repo_url.setter
48 def repo_url(self, repo_url): 42 def repo_url(self, repo_url):
49 self._repo_url = repo_url 43 self._repo_url = repo_url
50 44
51 @property 45 @property
52 def http_client(self): 46 def http_client(self):
stgao 2016/10/25 23:01:33 If we can't set the http_client, why it should def
Sharu Jiang 2016/10/26 06:13:38 This would involve some refactoring since the orde
53 return self._http_client 47 return self._http_client
54 48
55 @property 49 @property
56 def identifier(self): 50 def identifier(self):
57 return self.repo_url 51 return self.repo_url
58 52
59 @Cached(namespace='Gitiles-json-view', expire_time=CACHE_EXPIRE_TIME_SECONDS, 53 @Cached(namespace='Gitiles-json-view', expire_time=CACHE_EXPIRE_TIME_SECONDS,
60 cacher=CompressedMemCacher()) 54 cacher=CompressedMemCacher())
61 def _SendRequestForJsonResponse(self, url, params=None): 55 def _SendRequestForJsonResponse(self, url, params=None):
62 if params is None: # pragma: no cover 56 if params is None: # pragma: no cover
(...skipping 11 matching lines...) Expand all
74 68
75 return json.loads(content[len(prefix):]) 69 return json.loads(content[len(prefix):])
76 70
77 @Cached(namespace='Gitiles-text-view', expire_time=CACHE_EXPIRE_TIME_SECONDS) 71 @Cached(namespace='Gitiles-text-view', expire_time=CACHE_EXPIRE_TIME_SECONDS)
78 def _SendRequestForTextResponse(self, url): 72 def _SendRequestForTextResponse(self, url):
79 status_code, content = self.http_client.Get(url, {'format': 'text'}) 73 status_code, content = self.http_client.Get(url, {'format': 'text'})
80 if status_code != 200: 74 if status_code != 200:
81 return None 75 return None
82 return base64.b64decode(content) 76 return base64.b64decode(content)
83 77
84 def ExtractCommitPositionAndCodeReviewUrl(self, message):
85 """Returns the commit position and code review url in the commit message.
86
87 A "commit position" is something similar to SVN version ids; i.e.,
88 numeric identifiers which are issued in sequential order. The reason
89 we care about them is that they're easier for humans to read than
90 the hashes that Git uses internally for identifying commits. We
91 should never actually use them for *identifying* commits; they're
92 only for pretty printing to humans.
93
94 Returns:
95 (commit_position, code_review_url)
96 """
97 if not message:
98 return (None, None)
99
100 commit_position = None
101 code_review_url = None
102
103 # Commit position and code review url are in the last 5 lines.
104 lines = message.strip().split('\n')[-5:]
105 lines.reverse()
106
107 for line in lines:
108 if commit_position is None:
109 match = COMMIT_POSITION_PATTERN.match(line)
110 if match:
111 commit_position = int(match.group(1))
112
113 if code_review_url is None:
114 match = CODE_REVIEW_URL_PATTERN.match(line)
115 if match:
116 code_review_url = match.group(1)
117 return (commit_position, code_review_url)
118
119 def _NormalizeEmail(self, email):
120 """Normalizes the email from git repo.
121
122 Some email is like: test@chromium.org@bbb929c8-8fbe-4397-9dbb-9b2b20218538.
123 """
124 parts = email.split('@')
125 return '@'.join(parts[0:2])
126
127 def _GetDateTimeFromString(self, datetime_string, 78 def _GetDateTimeFromString(self, datetime_string,
128 date_format='%a %b %d %H:%M:%S %Y'): 79 date_format='%a %b %d %H:%M:%S %Y'):
129 if TIMEZONE_PATTERN.findall(datetime_string): 80 if TIMEZONE_PATTERN.findall(datetime_string):
130 # Need to handle timezone conversion. 81 # Need to handle timezone conversion.
131 naive_datetime_str, _, offset_str = datetime_string.rpartition(' ') 82 naive_datetime_str, _, offset_str = datetime_string.rpartition(' ')
132 naive_datetime = datetime.strptime(naive_datetime_str, 83 naive_datetime = datetime.strptime(naive_datetime_str,
133 date_format) 84 date_format)
134 hour_offset = int(offset_str[-4:-2]) 85 hour_offset = int(offset_str[-4:-2])
135 minute_offset = int(offset_str[-2:]) 86 minute_offset = int(offset_str[-2:])
136 if(offset_str[0]) == '-': 87 if(offset_str[0]) == '-':
137 hour_offset = -hour_offset 88 hour_offset = -hour_offset
138 minute_offset = -minute_offset 89 minute_offset = -minute_offset
139 90
140 time_delta = timedelta(hours=hour_offset, minutes=minute_offset) 91 time_delta = timedelta(hours=hour_offset, minutes=minute_offset)
141 92
142 utc_datetime = naive_datetime - time_delta 93 utc_datetime = naive_datetime - time_delta
143 return utc_datetime 94 return utc_datetime
144 95
145 return datetime.strptime(datetime_string, date_format) 96 return datetime.strptime(datetime_string, date_format)
146 97
147 def _DownloadChangeLogData(self, revision): 98 def _DownloadChangeLogData(self, revision):
148 url = '%s/+/%s' % (self.repo_url, revision) 99 url = '%s/+/%s' % (self.repo_url, revision)
149 return url, self._SendRequestForJsonResponse(url) 100 return url, self._SendRequestForJsonResponse(url)
150 101
151 def GetRevertedRevision(self, message):
152 """Parse message to get the reverted revision if there is one."""
153 lines = message.strip().splitlines()
154 if not lines[0].lower().startswith('revert'):
155 return None
156
157 for line in reversed(lines): # pragma: no cover
158 # TODO: Handle cases where no reverted_revision in reverting message.
159 reverted_revision_match = REVERTED_REVISION_PATTERN.match(line)
160 if reverted_revision_match:
161 return reverted_revision_match.group(1)
162
163 def _ParseChangeLogFromLogData(self, data): 102 def _ParseChangeLogFromLogData(self, data):
164 commit_position, code_review_url = ( 103 commit_position, code_review_url = (
165 self.ExtractCommitPositionAndCodeReviewUrl(data['message'])) 104 repo_util.ExtractCommitPositionAndCodeReviewUrl(data['message']))
166 105
167 touched_files = [] 106 touched_files = []
168 for file_diff in data['tree_diff']: 107 for file_diff in data['tree_diff']:
169 change_type = file_diff['type'].lower() 108 change_type = file_diff['type'].lower()
170 if not diff.IsKnownChangeType(change_type): 109 if not diff.IsKnownChangeType(change_type):
171 raise Exception('Unknown change type "%s"' % change_type) 110 raise Exception('Unknown change type "%s"' % change_type)
172 touched_files.append( 111 touched_files.append(
173 FileChangeInfo( 112 FileChangeInfo(
174 change_type, file_diff['old_path'], file_diff['new_path'])) 113 change_type, file_diff['old_path'], file_diff['new_path']))
175 114
176 author_time = self._GetDateTimeFromString(data['author']['time']) 115 author_time = self._GetDateTimeFromString(data['author']['time'])
177 committer_time = self._GetDateTimeFromString(data['committer']['time']) 116 committer_time = self._GetDateTimeFromString(data['committer']['time'])
178 reverted_revision = self.GetRevertedRevision(data['message']) 117 reverted_revision = repo_util.GetRevertedRevision(data['message'])
179 url = '%s/+/%s' % (self.repo_url, data['commit']) 118 url = '%s/+/%s' % (self.repo_url, data['commit'])
180 119
181 return ChangeLog( 120 return ChangeLog(
182 data['author']['name'], self._NormalizeEmail(data['author']['email']), 121 data['author']['name'],
122 repo_util.NormalizeEmail(data['author']['email']),
183 author_time, 123 author_time,
184 data['committer']['name'], 124 data['committer']['name'],
185 self._NormalizeEmail(data['committer']['email']), 125 repo_util.NormalizeEmail(data['committer']['email']),
186 committer_time, data['commit'], commit_position, 126 committer_time, data['commit'], commit_position,
187 data['message'], touched_files, url, code_review_url, 127 data['message'], touched_files, url, code_review_url,
188 reverted_revision) 128 reverted_revision)
189 129
190 def GetChangeLog(self, revision): 130 def GetChangeLog(self, revision):
191 """Returns the change log of the given revision.""" 131 """Returns the change log of the given revision."""
192 _, data = self._DownloadChangeLogData(revision) 132 _, data = self._DownloadChangeLogData(revision)
193 if not data: 133 if not data:
194 return None 134 return None
195 135
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after
243 return None 183 return None
244 184
245 blame = Blame(revision, path) 185 blame = Blame(revision, path)
246 for region in data['regions']: 186 for region in data['regions']:
247 author_time = self._GetDateTimeFromString( 187 author_time = self._GetDateTimeFromString(
248 region['author']['time'], '%Y-%m-%d %H:%M:%S') 188 region['author']['time'], '%Y-%m-%d %H:%M:%S')
249 189
250 blame.AddRegion( 190 blame.AddRegion(
251 Region(region['start'], region['count'], region['commit'], 191 Region(region['start'], region['count'], region['commit'],
252 region['author']['name'], 192 region['author']['name'],
253 self._NormalizeEmail(region['author']['email']), author_time)) 193 repo_util.NormalizeEmail(region['author']['email']),
194 author_time))
254 195
255 return blame 196 return blame
256 197
257 def GetSource(self, path, revision): 198 def GetSource(self, path, revision):
258 """Returns source code of the file at ``path`` of the given revision.""" 199 """Returns source code of the file at ``path`` of the given revision."""
259 url = '%s/+/%s/%s' % (self.repo_url, revision, path) 200 url = '%s/+/%s/%s' % (self.repo_url, revision, path)
260 return self._SendRequestForTextResponse(url) 201 return self._SendRequestForTextResponse(url)
261 202
262 def GetChangeLogs(self, start_revision, end_revision, n=1000): 203 def GetChangeLogs(self, start_revision, end_revision, n=1000):
263 """Gets a list of ChangeLogs in revision range by batch. 204 """Gets a list of ChangeLogs in revision range by batch.
(...skipping 18 matching lines...) Expand all
282 223
283 for log in data['log']: 224 for log in data['log']:
284 changelogs.append(self._ParseChangeLogFromLogData(log)) 225 changelogs.append(self._ParseChangeLogFromLogData(log))
285 226
286 if 'next' in data: 227 if 'next' in data:
287 next_end_revision = data['next'] 228 next_end_revision = data['next']
288 else: 229 else:
289 next_end_revision = None 230 next_end_revision = None
290 231
291 return changelogs 232 return changelogs
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698