| OLD | NEW |
| (Empty) |
| 1 # Copyright 2014 The Chromium Authors. All rights reserved. | |
| 2 # Use of this source code is governed by a BSD-style license that can be | |
| 3 # found in the LICENSE file. | |
| 4 import collections | |
| 5 import hashlib | |
| 6 import logging | |
| 7 import re | |
| 8 | |
| 9 from cStringIO import StringIO | |
| 10 | |
| 11 from infra.services.gnumbd.support.util import cached_property, freeze | |
| 12 | |
| 13 LOGGER = logging.getLogger(__name__) | |
| 14 | |
| 15 ################################################################################ | |
| 16 # Exceptions | |
| 17 ################################################################################ | |
| 18 | |
| 19 class PartialCommit(Exception): | |
| 20 def __init__(self, hsh, raw): | |
| 21 super(PartialCommit, self).__init__( | |
| 22 'Commit %s has partial content: %r' % (hsh, raw)) | |
| 23 self.raw = raw | |
| 24 | |
| 25 | |
| 26 class UnexpectedHeader(Exception): | |
| 27 def __init__(self, hsh, header, value): | |
| 28 super(UnexpectedHeader, self).__init__( | |
| 29 'Unexpected header in commit %s: %r -> %r' % (hsh, header, value)) | |
| 30 | |
| 31 | |
| 32 ################################################################################ | |
| 33 # Base Class | |
| 34 ################################################################################ | |
| 35 | |
| 36 class Alterable(object): | |
| 37 def to_dict(self): # pragma: no cover | |
| 38 """The shallow dictionary representation of this object (i.e. the dictionary | |
| 39 may contain Alterable instances as values).""" | |
| 40 raise NotImplementedError() | |
| 41 | |
| 42 def alter(self, **kwargs): # pragma: no cover | |
| 43 """Returns a copy of self, except with the fields listed in kwargs replaced | |
| 44 with new values.""" | |
| 45 raise NotImplementedError() | |
| 46 | |
| 47 @classmethod | |
| 48 def from_raw(cls, data): # pragma: no cover | |
| 49 """Construct an instance of this class from a string.""" | |
| 50 raise NotImplementedError() | |
| 51 | |
| 52 | |
| 53 ################################################################################ | |
| 54 # Implementation | |
| 55 ################################################################################ | |
| 56 | |
| 57 class CommitTimestamp(Alterable): | |
| 58 def __init__(self, secs, sign, hours, mins): | |
| 59 super(CommitTimestamp, self).__init__() | |
| 60 assert isinstance(secs, int) | |
| 61 assert sign in '+-' | |
| 62 assert 0 <= hours < 24 | |
| 63 assert 0 <= mins < 60 | |
| 64 | |
| 65 self._secs = secs | |
| 66 self._sign = sign | |
| 67 self._hours = hours | |
| 68 self._mins = mins | |
| 69 | |
| 70 # Comparison & Representation | |
| 71 def __eq__(self, other): | |
| 72 return (self is other) or ( | |
| 73 isinstance(other, CommitTimestamp) and ( | |
| 74 self.secs == other.secs and | |
| 75 self.sign == other.sign and | |
| 76 self.hours == other.hours and | |
| 77 self.mins == other.mins | |
| 78 ) | |
| 79 ) | |
| 80 | |
| 81 def __ne__(self, other): | |
| 82 return not (self == other) | |
| 83 | |
| 84 def __repr__(self): | |
| 85 return 'CommitTimestamp(%r, %r, %r, %r)' % ( | |
| 86 self.secs, self.sign, self.hours, self.mins) | |
| 87 | |
| 88 def __str__(self): | |
| 89 return '%s %s' % (self.secs, self.tz_str) | |
| 90 | |
| 91 # Accessors | |
| 92 # pylint: disable=W0212 | |
| 93 hours = property(lambda self: self._hours) | |
| 94 mins = property(lambda self: self._mins) | |
| 95 secs = property(lambda self: self._secs) | |
| 96 sign = property(lambda self: self._sign) | |
| 97 | |
| 98 @property | |
| 99 def tz_str(self): | |
| 100 return '%s%02d%02d' % (self.sign, self.hours, self.mins) | |
| 101 | |
| 102 # Methods | |
| 103 def to_dict(self): | |
| 104 return {k: getattr(self, k) for k in ['secs', 'sign', 'hours', 'mins']} | |
| 105 | |
| 106 def alter(self, **kwargs): | |
| 107 new_args = self.to_dict() | |
| 108 assert set(new_args).issuperset(kwargs.keys()) | |
| 109 new_args.update(kwargs) | |
| 110 return CommitTimestamp(**new_args) | |
| 111 | |
| 112 @classmethod | |
| 113 def from_raw(cls, data): | |
| 114 # \d+ [+-]HHMM | |
| 115 secs, tz = data.split(' ') | |
| 116 return cls(int(secs), tz[0], int(tz[1:3]), int(tz[3:5])) | |
| 117 | |
| 118 | |
| 119 NULL_TIMESTAMP = CommitTimestamp(0, '+', 0, 0) | |
| 120 | |
| 121 | |
| 122 class CommitUser(Alterable): | |
| 123 def __init__(self, user, email, timestamp): | |
| 124 super(CommitUser, self).__init__() | |
| 125 assert isinstance(user, basestring) and user | |
| 126 assert isinstance(email, basestring) and email | |
| 127 assert isinstance(timestamp, CommitTimestamp) | |
| 128 self._user = user | |
| 129 self._email = email | |
| 130 self._timestamp = timestamp | |
| 131 | |
| 132 # Comparison & Representation | |
| 133 def __eq__(self, other): | |
| 134 return (self is other) or ( | |
| 135 isinstance(other, CommitUser) and ( | |
| 136 self.user == other.user and | |
| 137 self.email == other.email and | |
| 138 self.timestamp == other.timestamp | |
| 139 ) | |
| 140 ) | |
| 141 | |
| 142 def __ne__(self, other): | |
| 143 return not (self == other) | |
| 144 | |
| 145 def __repr__(self): | |
| 146 return 'CommitUser(%r, %r, %r)' % (self.user, self.email, self.timestamp) | |
| 147 | |
| 148 def __str__(self): | |
| 149 return '%s <%s> %s' % (self.user, self.email, self.timestamp) | |
| 150 | |
| 151 # Accessors | |
| 152 # pylint: disable=W0212 | |
| 153 user = property(lambda self: self._user) | |
| 154 email = property(lambda self: self._email) | |
| 155 timestamp = property(lambda self: self._timestamp) | |
| 156 | |
| 157 # Methods | |
| 158 def to_dict(self): | |
| 159 return {k: getattr(self, k) for k in ['user', 'email', 'timestamp']} | |
| 160 | |
| 161 def alter(self, **kwargs): | |
| 162 new_args = self.to_dict() | |
| 163 assert set(new_args).issuperset(kwargs.keys()) | |
| 164 new_args.update(kwargs) | |
| 165 return CommitUser(**new_args) | |
| 166 | |
| 167 @classmethod | |
| 168 def from_raw(cls, data): | |
| 169 # safe_string() ' <' safe_string() '> ' [TIMESTAMP] | |
| 170 user, rest = data.split(' <', 1) | |
| 171 email, rest = rest.split('> ', 1) | |
| 172 return cls(user, email, CommitTimestamp.from_raw(rest)) | |
| 173 | |
| 174 | |
| 175 class CommitData(Alterable): | |
| 176 """A workable data representation of a git commit object. | |
| 177 | |
| 178 Knows how to parse all the standard fields of a git commit object: | |
| 179 * tree | |
| 180 * parent(s) | |
| 181 * author | |
| 182 * committer | |
| 183 * commit message | |
| 184 | |
| 185 Also knows how to parse 'footers' which are an informally-defined mechanism to | |
| 186 append key-value pairs to the ends of commit messages. | |
| 187 | |
| 188 Footers are stored internally as a list of (key, value) pairs. This is in | |
| 189 order to provide full round-trip compatibility for CommitData, since footers | |
| 190 have no implied ordering, other than the ordering in the commit. Consider the | |
| 191 footers: | |
| 192 | |
| 193 A: 1 | |
| 194 B: 2 | |
| 195 A: 3 | |
| 196 | |
| 197 In order to represent this as something better than a list which maintains the | |
| 198 round-trip invariant, we would need a (Frozen)OrderedMultiDict, which would be | |
| 199 tricky to implement. | |
| 200 | |
| 201 Author and committer are treated as the format defined by CommitUser | |
| 202 """ | |
| 203 FOOTER_RE = re.compile(r'([-a-zA-Z]+): (.*)') | |
| 204 HASH_RE = re.compile(r'[0-9a-f]{40}') | |
| 205 | |
| 206 def __init__(self, tree, parents, author, committer, other_header_lines, | |
| 207 message_lines, footer_lines): | |
| 208 super(CommitData, self).__init__() | |
| 209 assert all('\n' not in h and self.HASH_RE.match(h) for h in parents) | |
| 210 assert '\n' not in tree and self.HASH_RE.match(tree) | |
| 211 assert isinstance(author, CommitUser) | |
| 212 assert isinstance(committer, CommitUser) | |
| 213 assert all(isinstance(l, str) for l in message_lines) | |
| 214 assert all(len(i) == 2 and all(isinstance(x, str) for x in i) | |
| 215 for i in other_header_lines) | |
| 216 assert all(len(i) == 2 and all(isinstance(x, str) for x in i) | |
| 217 for i in footer_lines) | |
| 218 | |
| 219 self._parents = freeze(parents) | |
| 220 self._tree = tree | |
| 221 self._author = author | |
| 222 self._committer = committer | |
| 223 self._other_header_lines = freeze(other_header_lines) | |
| 224 self._message_lines = freeze(message_lines) | |
| 225 self._footer_lines = freeze(footer_lines) | |
| 226 | |
| 227 # Comparison & Representation | |
| 228 def __eq__(self, other): | |
| 229 return (self is other) or ( | |
| 230 isinstance(other, CommitData) and ( | |
| 231 self.hsh == other.hsh | |
| 232 ) | |
| 233 ) | |
| 234 | |
| 235 def __ne__(self, other): | |
| 236 return not (self == other) | |
| 237 | |
| 238 def __repr__(self): | |
| 239 return ( | |
| 240 'CommitData({tree!r}, {parents!r}, {author!r}, {committer!r}, ' | |
| 241 '{other_header_lines!r}, {message_lines!r}, {footer_lines!r})' | |
| 242 ).format(**self.to_dict()) | |
| 243 | |
| 244 def __str__(self): | |
| 245 """Produces a string representation of this CommitData suitable for | |
| 246 consumption by `git hash-object`. | |
| 247 """ | |
| 248 ret = StringIO() | |
| 249 print >> ret, 'tree', self.tree | |
| 250 for parent in self.parents: | |
| 251 print >> ret, 'parent', parent | |
| 252 print >> ret, 'author', self.author | |
| 253 print >> ret, 'committer', self.committer | |
| 254 for key, value in self.other_header_lines: | |
| 255 print >> ret, key, value | |
| 256 print >> ret | |
| 257 print >> ret, '\n'.join(self.message_lines) | |
| 258 if self.footer_lines: | |
| 259 print >> ret | |
| 260 for key, value in self.footer_lines: | |
| 261 print >> ret, '%s: %s' % (key, value) | |
| 262 return ret.getvalue() | |
| 263 | |
| 264 # Accessors | |
| 265 # pylint: disable=W0212 | |
| 266 author = property(lambda self: self._author) | |
| 267 committer = property(lambda self: self._committer) | |
| 268 footer_lines = property(lambda self: self._footer_lines) | |
| 269 message_lines = property(lambda self: self._message_lines) | |
| 270 other_header_lines = property(lambda self: self._other_header_lines) | |
| 271 parents = property(lambda self: self._parents) | |
| 272 tree = property(lambda self: self._tree) | |
| 273 | |
| 274 @cached_property | |
| 275 def footers(self): | |
| 276 ret = collections.OrderedDict() | |
| 277 for key, value in self.footer_lines: | |
| 278 ret.setdefault(key, []).append(value) | |
| 279 return freeze(ret) | |
| 280 | |
| 281 @cached_property | |
| 282 def other_headers(self): | |
| 283 ret = collections.OrderedDict() | |
| 284 for key, value in self.other_header_lines: | |
| 285 ret.setdefault(key, []).append(value) | |
| 286 return freeze(ret) | |
| 287 | |
| 288 @cached_property | |
| 289 def hsh(self): | |
| 290 return hashlib.sha1(str(self)).hexdigest() | |
| 291 | |
| 292 # Methods | |
| 293 def to_dict(self): | |
| 294 return { | |
| 295 k: getattr(self, k) | |
| 296 for k in ['parents', 'tree', 'author', 'committer', | |
| 297 'other_header_lines', 'message_lines', 'footer_lines'] | |
| 298 } | |
| 299 | |
| 300 def alter(self, **kwargs): | |
| 301 """In addition to the normal fields on this class, you may also provide | |
| 302 'footers' and 'other_headers' instead of 'footer_lines' and | |
| 303 'other_header_lines' respectively. | |
| 304 | |
| 305 These are an OrderedDict, which will be merged into the existing *_lines | |
| 306 as described by merge_lines. | |
| 307 """ | |
| 308 new_args = self.to_dict() | |
| 309 if 'footers' in kwargs: | |
| 310 assert 'footer_lines' not in kwargs | |
| 311 new_args['footer_lines'] = self.merge_lines( | |
| 312 self.footer_lines, kwargs.pop('footers')) | |
| 313 if 'other_headers' in kwargs: | |
| 314 assert 'other_header_lines' not in kwargs | |
| 315 new_args['other_header_lines'] = self.merge_lines( | |
| 316 self.other_header_lines, kwargs.pop('other_headers')) | |
| 317 assert set(new_args).issuperset(kwargs.keys()) | |
| 318 new_args.update(kwargs) | |
| 319 return CommitData(**new_args) | |
| 320 | |
| 321 @staticmethod | |
| 322 def merge_lines(old_lines, new_dict): | |
| 323 """Produces new footer or other_header_lines given the old lines and the | |
| 324 new dictionary. | |
| 325 | |
| 326 Preserves the order of |old_lines| as much as possible. | |
| 327 | |
| 328 Rules: | |
| 329 * If a key is in new_dict, but the key is not in old_lines, the new | |
| 330 lines are added at the end. | |
| 331 * If a key is not in new_dict, it is passed through. | |
| 332 * If a key is equal to None in new_dict, lines with that key are removed. | |
| 333 * If a key is present in both, all entries in new_dict for that key are | |
| 334 inserted at the location of the first line in old_lines for that key | |
| 335 (and any other lines in old_lines with that key are removed). | |
| 336 | |
| 337 Args: | |
| 338 old_lines - a sequence of (key, value) pairs | |
| 339 new_dict - an OrderedDict of {key: [values]} or {key: None} | |
| 340 """ | |
| 341 old_dict = collections.OrderedDict() | |
| 342 for key, value in old_lines: | |
| 343 old_dict.setdefault(key, []).append(value) | |
| 344 | |
| 345 old_keys = set(old_dict) | |
| 346 | |
| 347 del_keys = {k for k, v in new_dict.iteritems() if not v} | |
| 348 new_keys = ({k for k, v in new_dict.iteritems() if v} | old_keys) - del_keys | |
| 349 | |
| 350 # delete keys | |
| 351 new_lines = [(k, v) for k, v in old_lines if k in new_keys] | |
| 352 | |
| 353 for change_key in (new_keys & old_keys): | |
| 354 insert_idx = None | |
| 355 to_nuke = set() | |
| 356 for i, (k, v) in enumerate(new_lines): | |
| 357 if k == change_key: | |
| 358 if insert_idx is None: | |
| 359 insert_idx = i | |
| 360 to_nuke.add(i) | |
| 361 assert to_nuke # because it's in old_keys | |
| 362 new_lines = [(k, v) for i, (k, v) in enumerate(new_lines) | |
| 363 if i not in to_nuke] | |
| 364 new_lines[insert_idx:insert_idx] = [ | |
| 365 (change_key, v) | |
| 366 for v in new_dict.get(change_key, old_dict[change_key]) | |
| 367 ] | |
| 368 | |
| 369 for add_key in new_dict: # Preserve sort order of new lines | |
| 370 if add_key in old_keys or add_key in del_keys: | |
| 371 continue | |
| 372 new_lines.extend((add_key, v) for v in new_dict[add_key]) | |
| 373 | |
| 374 return new_lines | |
| 375 | |
| 376 @classmethod | |
| 377 def from_raw(cls, data): | |
| 378 """Turns the raw output of `git cat-file commit` into a CommitData.""" | |
| 379 users = {} | |
| 380 parents = [] | |
| 381 tree = None | |
| 382 hsh_ref = [] | |
| 383 def hsh_fn(): | |
| 384 if not hsh_ref: | |
| 385 hsh_ref.append(hashlib.sha1(data).hexdigest()) | |
| 386 return hsh_ref[0] | |
| 387 | |
| 388 if data[-1:] != '\n': | |
| 389 raise PartialCommit(hsh_fn(), data) | |
| 390 | |
| 391 i = 0 | |
| 392 raw_lines = data.splitlines() | |
| 393 other_header_lines = [] | |
| 394 for line in raw_lines: | |
| 395 if not line: | |
| 396 break | |
| 397 header, data = line.split(' ', 1) | |
| 398 if header == 'parent': | |
| 399 parents.append(data) | |
| 400 elif header in ('author', 'committer'): | |
| 401 if header in users: | |
| 402 raise UnexpectedHeader(hsh_fn(), header, data) | |
| 403 users[header] = CommitUser.from_raw(data) | |
| 404 elif header == 'tree': | |
| 405 if tree: | |
| 406 raise UnexpectedHeader(hsh_fn(), header, data) | |
| 407 tree = data | |
| 408 else: | |
| 409 LOGGER.warn('Unexpected header in git commit %r: %r -> %r', | |
| 410 hsh_fn(), header, data) | |
| 411 other_header_lines.append((header, data)) | |
| 412 i += 1 | |
| 413 | |
| 414 raw_message = raw_lines[i+1:] | |
| 415 | |
| 416 # footers are lines in the form: | |
| 417 # ...message... | |
| 418 # <empty line> | |
| 419 # foo: data | |
| 420 # bar: other data | |
| 421 # ... | |
| 422 # | |
| 423 # If no empty line is found, they're considered not to exist. | |
| 424 # If one line in the footers doesn't match the 'key: value' format, none | |
| 425 # of the footers are considered to exist. | |
| 426 message_lines = raw_message | |
| 427 footer_lines = [] | |
| 428 | |
| 429 i = 0 | |
| 430 for line in reversed(raw_message): | |
| 431 if not line: | |
| 432 message_lines = raw_message[:-(i+1)] | |
| 433 break | |
| 434 | |
| 435 m = cls.FOOTER_RE.match(line) | |
| 436 if m: | |
| 437 footer_lines.append((m.group(1), m.group(2))) | |
| 438 else: | |
| 439 if i: | |
| 440 footer_lines = [] | |
| 441 LOGGER.warn('Malformed footers') | |
| 442 break | |
| 443 i += 1 | |
| 444 else: | |
| 445 LOGGER.warn('Footers comprise entire message') | |
| 446 message_lines = [] | |
| 447 | |
| 448 footer_lines.reverse() | |
| 449 | |
| 450 if not tree or set(('author', 'committer')).difference(users.keys()): | |
| 451 raise PartialCommit(hsh_fn(), data) | |
| 452 | |
| 453 return cls(tree, parents, users['author'], users['committer'], | |
| 454 other_header_lines, message_lines, footer_lines) | |
| OLD | NEW |