Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 # Copyright 2013 The Chromium Authors. All rights reserved. | 1 # Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 import difflib | 5 import difflib |
| 6 import hashlib | 6 import hashlib |
| 7 import itertools | |
| 8 import json | |
| 7 import os | 9 import os |
| 8 import re | |
| 9 import sys | 10 import sys |
| 11 import zipfile | |
| 10 | 12 |
| 11 | 13 |
| 12 # When set and a difference is detected, a diff of what changed is printed. | 14 # When set and a difference is detected, a diff of what changed is printed. |
| 13 _PRINT_MD5_DIFFS = int(os.environ.get('PRINT_MD5_DIFFS', 0)) | 15 _PRINT_MD5_DIFFS = int(os.environ.get('PRINT_MD5_DIFFS', 0)) |
| 14 | 16 |
| 15 # Used to strip off temp dir prefix. | |
| 16 _TEMP_DIR_PATTERN = re.compile(r'^/tmp/.*?/') | |
| 17 | |
| 18 | 17 |
| 19 def CallAndRecordIfStale( | 18 def CallAndRecordIfStale( |
| 20 function, record_path=None, input_paths=None, input_strings=None, | 19 function, record_path=None, input_paths=None, input_strings=None, |
| 21 output_paths=None, force=False): | 20 output_paths=None, force=False, pass_changes=False): |
| 22 """Calls function if outputs are stale. | 21 """Calls function if outputs are stale. |
| 23 | 22 |
| 24 Outputs are considered stale if: | 23 Outputs are considered stale if: |
| 25 - any output_paths are missing, or | 24 - any output_paths are missing, or |
| 26 - the contents of any file within input_paths has changed, or | 25 - the contents of any file within input_paths has changed, or |
| 27 - the contents of input_strings has changed. | 26 - the contents of input_strings has changed. |
| 28 | 27 |
| 29 To debug which files are out-of-date, set the environment variable: | 28 To debug which files are out-of-date, set the environment variable: |
| 30 PRINT_MD5_DIFFS=1 | 29 PRINT_MD5_DIFFS=1 |
| 31 | 30 |
| 32 Args: | 31 Args: |
| 33 function: The function to call. | 32 function: The function to call. |
| 34 record_path: Path to record metadata. | 33 record_path: Path to record metadata. |
| 35 Defaults to output_paths[0] + '.md5.stamp' | 34 Defaults to output_paths[0] + '.md5.stamp' |
| 36 input_paths: List of paths to calcualte an md5 sum on. | 35 input_paths: List of paths to calcualte an md5 sum on. |
| 37 input_strings: List of strings to record verbatim. | 36 input_strings: List of strings to record verbatim. |
| 38 output_paths: List of output paths. | 37 output_paths: List of output paths. |
| 39 force: When True, function is always called. | 38 force: Whether to treat outputs as missing regardless of whether they |
| 39 actually are. | |
| 40 pass_changes: Whether to pass a Changes instance to |function|. | |
| 40 """ | 41 """ |
| 41 input_paths = input_paths or [] | 42 input_paths = input_paths or [] |
| 42 input_strings = input_strings or [] | 43 input_strings = input_strings or [] |
| 43 output_paths = output_paths or [] | 44 output_paths = output_paths or [] |
| 44 record_path = record_path or output_paths[0] + '.md5.stamp' | 45 record_path = record_path or output_paths[0] + '.md5.stamp' |
| 45 md5_checker = _Md5Checker( | 46 |
| 46 record_path=record_path, | 47 assert record_path.endswith('.stamp'), ( |
| 47 input_paths=input_paths, | 48 'record paths must end in \'.stamp\' so that they are easy to find ' |
| 48 input_strings=input_strings) | 49 'and delete') |
| 49 | 50 |
| 50 missing_outputs = [x for x in output_paths if not os.path.exists(x)] | 51 new_metadata = _Metadata() |
| 51 is_stale = md5_checker.old_digest != md5_checker.new_digest | 52 new_metadata.AddStrings(input_strings) |
| 52 | 53 |
| 53 if force or missing_outputs or is_stale: | 54 for path in input_paths: |
| 54 if _PRINT_MD5_DIFFS: | 55 if _IsZipFile(path): |
| 55 print '=' * 80 | 56 entries = _ExtractZipEntries(path) |
| 56 print 'Difference found in %s:' % record_path | 57 new_metadata.AddZipFile(path, entries) |
| 57 if missing_outputs: | 58 else: |
| 58 print 'Outputs do not exist:\n' + '\n'.join(missing_outputs) | 59 new_metadata.AddFile(path, _Md5ForPath(path)) |
| 59 elif force: | 60 |
| 60 print 'force=True' | 61 old_metadata = None |
| 61 else: | 62 missing_outputs = [x for x in output_paths if force or not os.path.exists(x)] |
| 62 print md5_checker.DescribeDifference() | 63 # When outputs are missing, don't bother gathering change information. |
| 63 print '=' * 80 | 64 if not missing_outputs and os.path.exists(record_path): |
| 64 function() | 65 with open(record_path, 'r') as jsonfile: |
| 65 md5_checker.Write() | 66 try: |
| 67 old_metadata = _Metadata.FromFile(jsonfile) | |
| 68 except: # pylint: disable=bare-except | |
| 69 pass # Not yet using new file format. | |
| 70 | |
| 71 changes = Changes(old_metadata, new_metadata, force, missing_outputs) | |
| 72 if not changes.HasChanges(): | |
| 73 return | |
| 74 | |
| 75 if _PRINT_MD5_DIFFS: | |
| 76 print '=' * 80 | |
| 77 print 'Target is stale: %s' % record_path | |
| 78 print changes.DescribeDifference() | |
| 79 print '=' * 80 | |
| 80 | |
| 81 # Delete the old metdata beforehand since failures leave it in an | |
| 82 # inderterminate state. | |
| 83 if old_metadata: | |
| 84 os.unlink(record_path) | |
| 85 | |
| 86 args = (changes,) if pass_changes else () | |
| 87 function(*args) | |
| 88 | |
| 89 with open(record_path, 'w') as f: | |
| 90 new_metadata.Write(f) | |
| 91 | |
| 92 | |
| 93 class Changes(object): | |
| 94 """Provides and API for querying what changed between runs.""" | |
| 95 | |
| 96 def __init__(self, old_metadata, new_metadata, force, missing_outputs): | |
| 97 self.old_metadata = old_metadata | |
| 98 self.new_metadata = new_metadata | |
| 99 self.force = force | |
| 100 self.missing_outputs = missing_outputs | |
| 101 | |
| 102 def _GetOldTag(self, path, subpath=None): | |
| 103 return self.old_metadata and self.old_metadata.GetTag(path, subpath) | |
| 104 | |
| 105 def HasChanges(self): | |
| 106 """Returns whether any changes exist.""" | |
| 107 return (self.force or | |
| 108 self.old_metadata is None or | |
|
jbudorick
2015/09/23 00:26:20
not self.old_metadata?
agrieve
2015/09/23 02:07:56
Done.
| |
| 109 self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5() or | |
| 110 self.old_metadata.FilesMd5() != self.new_metadata.FilesMd5()) | |
| 111 | |
| 112 def AddedOrModifiedOnly(self): | |
| 113 """Returns whether the only changes were from added or modified (sub)files. | |
| 114 | |
| 115 No missing outputs, no removed paths/subpaths. | |
| 116 """ | |
| 117 if (self.force or | |
| 118 not self.old_metadata or | |
| 119 self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5()): | |
| 120 return False | |
| 121 if any(self.IterRemovedPaths()): | |
| 122 return False | |
| 123 for path in self.IterModifiedPaths(): | |
| 124 if any(self.IterRemovedSubpaths(path)): | |
| 125 return False | |
| 126 return True | |
| 127 | |
| 128 def IterAddedPaths(self): | |
| 129 """Generator for paths that were added.""" | |
| 130 for path in self.new_metadata.IterPaths(): | |
| 131 if self._GetOldTag(path) is None: | |
| 132 yield path | |
| 133 | |
| 134 def IterAddedSubpaths(self, path): | |
| 135 """Generator for paths that were added within the given zip file.""" | |
| 136 for subpath in self.new_metadata.IterSubpaths(path): | |
| 137 if self._GetOldTag(path, subpath) is None: | |
| 138 yield subpath | |
| 139 | |
| 140 def IterRemovedPaths(self): | |
| 141 """Generator for paths that were removed.""" | |
| 142 if self.old_metadata: | |
| 143 for path in self.old_metadata.IterPaths(): | |
| 144 if self.new_metadata.GetTag(path) is None: | |
| 145 yield path | |
| 146 | |
| 147 def IterRemovedSubpaths(self, path): | |
| 148 """Generator for paths that were removed within the given zip file.""" | |
| 149 if self.old_metadata: | |
| 150 for subpath in self.old_metadata.IterSubpaths(path): | |
| 151 if self.new_metadata.GetTag(path, subpath) is None: | |
| 152 yield subpath | |
| 153 | |
| 154 def IterModifiedPaths(self): | |
| 155 """Generator for paths whose contents have changed.""" | |
| 156 for path in self.new_metadata.IterPaths(): | |
| 157 old_tag = self._GetOldTag(path) | |
| 158 new_tag = self.new_metadata.GetTag(path) | |
| 159 if old_tag is not None and old_tag != new_tag: | |
| 160 yield path | |
| 161 | |
| 162 def IterModifiedSubpaths(self, path): | |
| 163 """Generator for paths within a zip file whose contents have changed.""" | |
| 164 for subpath in self.new_metadata.IterSubpaths(path): | |
| 165 old_tag = self._GetOldTag(path, subpath) | |
| 166 new_tag = self.new_metadata.GetTag(path, subpath) | |
| 167 if old_tag is not None and old_tag != new_tag: | |
| 168 yield subpath | |
| 169 | |
| 170 def IterChangedPaths(self, path): | |
| 171 """Generator for all changed paths (added/removed/modified).""" | |
| 172 return itertools.chain(self.IterRemovedPaths(path), | |
| 173 self.IterModifiedPaths(path), | |
| 174 self.IterAddedPaths(path)) | |
| 175 | |
| 176 def IterChangedSubpaths(self, path): | |
| 177 """Generator for paths within a zip that were added/removed/modified.""" | |
| 178 return itertools.chain(self.IterRemovedSubpaths(path), | |
| 179 self.IterModifiedSubpaths(path), | |
| 180 self.IterAddedSubpaths(path)) | |
| 181 | |
| 182 def DescribeDifference(self): | |
| 183 """Returns a human-readable description of what changed.""" | |
| 184 if self.force: | |
| 185 return 'force=True' | |
| 186 elif self.missing_outputs: | |
| 187 return 'Outputs do not exist:\n ' + '\n '.join(self.missing_outputs) | |
| 188 elif self.old_metadata is None: | |
| 189 return 'Previous stamp file not found.' | |
| 190 | |
| 191 if self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5(): | |
| 192 ndiff = difflib.ndiff(self.old_metadata.GetStrings(), | |
| 193 self.new_metadata.GetStrings()) | |
| 194 changed = [s for s in ndiff if not s.startswith(' ')] | |
| 195 return 'Input strings changed:\n ' + '\n '.join(changed) | |
| 196 | |
| 197 if self.old_metadata.FilesMd5() == self.new_metadata.FilesMd5(): | |
| 198 return "There's no difference." | |
| 199 | |
| 200 lines = [] | |
| 201 lines.extend('Added: ' + p for p in self.IterAddedPaths()) | |
| 202 lines.extend('Removed: ' + p for p in self.IterRemovedPaths()) | |
| 203 for path in self.IterModifiedPaths(): | |
| 204 lines.append('Modified: ' + path) | |
| 205 lines.extend(' -> Subpath added: ' + p | |
| 206 for p in self.IterAddedSubpaths(path)) | |
| 207 lines.extend(' -> Subpath removed: ' + p | |
| 208 for p in self.IterRemovedSubpaths(path)) | |
| 209 lines.extend(' -> Subpath modified: ' + p | |
| 210 for p in self.IterModifiedSubpaths(path)) | |
| 211 if lines: | |
| 212 return 'Input files changed:\n ' + '\n '.join(lines) | |
| 213 return 'I have no idea what changed (there is a bug).' | |
|
jbudorick
2015/09/23 00:26:20
hahaha
| |
| 214 | |
| 215 | |
| 216 class _Metadata(object): | |
| 217 """Data model for tracking change metadata.""" | |
| 218 # Schema: | |
| 219 # { | |
| 220 # "files-md5": "VALUE", | |
| 221 # "strings-md5": "VALUE", | |
| 222 # "input-files": [ | |
| 223 # { | |
| 224 # "path": "path.jar", | |
| 225 # "tag": "{MD5 of entries}", | |
| 226 # "entries": [ | |
| 227 # { "path": "org/chromium/base/Foo.class", "tag": "{CRC32}" }, ... | |
| 228 # ] | |
| 229 # }, { | |
| 230 # "path": "path.txt", | |
| 231 # "tag": "{MD5}", | |
|
jbudorick
2015/09/23 00:26:20
nit: missing closing }
agrieve
2015/09/23 02:07:56
Done.
| |
| 232 # ], | |
| 233 # "input-strings": ["a", "b", ...], | |
| 234 # } | |
| 235 def __init__(self): | |
| 236 self._files_md5 = None | |
| 237 self._strings_md5 = None | |
| 238 self._files = [] | |
| 239 self._strings = [] | |
| 240 # Map of (path, subpath) -> entry. Created upon first call to _GetEntry(). | |
| 241 self._file_map = None | |
| 242 | |
| 243 @classmethod | |
| 244 def FromFile(cls, fileobj): | |
|
jbudorick
2015/09/23 00:26:20
not Read/Write or FromFile/ToFile? :(
agrieve
2015/09/23 02:07:56
Done.
| |
| 245 """Returns a _Metadata initialized from a file object.""" | |
| 246 ret = cls() | |
| 247 obj = json.load(fileobj) | |
| 248 ret._files_md5 = obj['files-md5'] | |
| 249 ret._strings_md5 = obj['strings-md5'] | |
| 250 ret._files = obj['input-files'] | |
| 251 ret._strings = obj['input-strings'] | |
| 252 return ret | |
| 253 | |
| 254 def Write(self, fileobj): | |
| 255 """Serializes metadata to the given file object.""" | |
| 256 obj = { | |
| 257 "files-md5": self.FilesMd5(), | |
| 258 "strings-md5": self.StringsMd5(), | |
| 259 "input-files": self._files, | |
| 260 "input-strings": self._strings, | |
| 261 } | |
| 262 json.dump(obj, fileobj, indent=2) | |
| 263 | |
| 264 def _AssertNotQueried(self): | |
| 265 assert self._files_md5 is None | |
| 266 assert self._strings_md5 is None | |
| 267 assert self._file_map is None | |
| 268 | |
| 269 def AddStrings(self, values): | |
| 270 self._AssertNotQueried() | |
| 271 self._strings.extend(str(v) for v in values) | |
| 272 | |
| 273 def AddFile(self, path, tag): | |
| 274 """Adds metadata for a non-zip file. | |
| 275 | |
| 276 Args: | |
| 277 path: Path to the file. | |
| 278 tag: A short string representative of the file contents. | |
| 279 """ | |
| 280 self._AssertNotQueried() | |
| 281 self._files.append({ | |
| 282 'path': path, | |
| 283 'tag': tag, | |
| 284 }) | |
| 285 | |
| 286 def AddZipFile(self, path, entries): | |
| 287 """Adds metadata for a zip file. | |
| 288 | |
| 289 Args: | |
| 290 path: Path to the file. | |
| 291 entries: List of (subpath, tag) tuples for entries within the zip. | |
| 292 """ | |
| 293 self._AssertNotQueried() | |
| 294 tag = _ComputeInlineMd5(itertools.chain((e[0] for e in entries), | |
| 295 (e[1] for e in entries))) | |
| 296 self._files.append({ | |
| 297 'path': path, | |
| 298 'tag': tag, | |
| 299 'entries': [{"path": e[0], "tag": e[1]} for e in entries], | |
| 300 }) | |
| 301 | |
| 302 def GetStrings(self): | |
| 303 """Returns the list of input strings.""" | |
| 304 return self._strings | |
| 305 | |
| 306 def FilesMd5(self): | |
| 307 """Lazily computes and returns the aggregate md5 of input files.""" | |
| 308 if self._files_md5 is None: | |
| 309 # Omit paths from md5 since temporary files have random names. | |
| 310 self._files_md5 = _ComputeInlineMd5( | |
| 311 self.GetTag(p) for p in sorted(self.IterPaths())) | |
| 312 return self._files_md5 | |
| 313 | |
| 314 def StringsMd5(self): | |
| 315 """Lazily computes and returns the aggregate md5 of input strings.""" | |
| 316 if self._strings_md5 is None: | |
| 317 self._strings_md5 = _ComputeInlineMd5(self._strings) | |
| 318 return self._strings_md5 | |
| 319 | |
| 320 def _GetEntry(self, path, subpath=None): | |
| 321 """Returns the JSON entry for the given path / subpath.""" | |
| 322 if self._file_map is None: | |
| 323 self._file_map = {} | |
| 324 for entry in self._files: | |
| 325 self._file_map[(entry['path'], None)] = entry | |
| 326 for subentry in entry.get('entries', ()): | |
| 327 self._file_map[(entry['path'], subentry['path'])] = subentry | |
| 328 return self._file_map.get((path, subpath)) | |
| 329 | |
| 330 def GetTag(self, path, subpath=None): | |
| 331 """Returns the tag for the given path / subpath.""" | |
| 332 ret = self._GetEntry(path, subpath) | |
| 333 return ret and ret['tag'] | |
| 334 | |
| 335 def IterPaths(self): | |
| 336 """Returns a generator for all top-level paths.""" | |
| 337 return (e['path'] for e in self._files) | |
| 338 | |
| 339 def IterSubpaths(self, path): | |
| 340 """Returns a generator for all subpaths in the given zip. | |
| 341 | |
| 342 If the given path is not a zip file, returns an empty generator. | |
| 343 """ | |
| 344 outer_entry = self._GetEntry(path) | |
| 345 subentries = outer_entry.get('entries', []) | |
| 346 return (entry['path'] for entry in subentries) | |
| 66 | 347 |
| 67 | 348 |
| 68 def _UpdateMd5ForFile(md5, path, block_size=2**16): | 349 def _UpdateMd5ForFile(md5, path, block_size=2**16): |
| 69 with open(path, 'rb') as infile: | 350 with open(path, 'rb') as infile: |
| 70 while True: | 351 while True: |
| 71 data = infile.read(block_size) | 352 data = infile.read(block_size) |
| 72 if not data: | 353 if not data: |
| 73 break | 354 break |
| 74 md5.update(data) | 355 md5.update(data) |
| 75 | 356 |
| 76 | 357 |
| 77 def _UpdateMd5ForDirectory(md5, dir_path): | 358 def _UpdateMd5ForDirectory(md5, dir_path): |
| 78 for root, _, files in os.walk(dir_path): | 359 for root, _, files in os.walk(dir_path): |
| 79 for f in files: | 360 for f in files: |
| 80 _UpdateMd5ForFile(md5, os.path.join(root, f)) | 361 _UpdateMd5ForFile(md5, os.path.join(root, f)) |
| 81 | 362 |
| 82 | 363 |
| 83 def _UpdateMd5ForPath(md5, path): | 364 def _Md5ForPath(path): |
| 365 md5 = hashlib.md5() | |
| 84 if os.path.isdir(path): | 366 if os.path.isdir(path): |
| 85 _UpdateMd5ForDirectory(md5, path) | 367 _UpdateMd5ForDirectory(md5, path) |
| 86 else: | 368 else: |
| 87 _UpdateMd5ForFile(md5, path) | 369 _UpdateMd5ForFile(md5, path) |
| 370 return md5.hexdigest() | |
| 88 | 371 |
| 89 | 372 |
| 90 def _TrimPathPrefix(path): | 373 def _ComputeInlineMd5(iterable): |
| 91 """Attempts to remove temp dir prefix from the path. | 374 """Computes the md5 of the concatenated parameters.""" |
| 92 | 375 md5 = hashlib.md5() |
| 93 Use this only for extended_info (not for the actual md5). | 376 for item in iterable: |
| 94 """ | 377 md5.update(str(item)) |
| 95 return _TEMP_DIR_PATTERN.sub('{TMP}', path) | 378 return md5.hexdigest() |
| 96 | 379 |
| 97 | 380 |
| 98 class _Md5Checker(object): | 381 def _IsZipFile(path): |
| 99 def __init__(self, record_path=None, input_paths=None, input_strings=None): | 382 """Returns whether to treat the given file as a zip file.""" |
| 100 if not input_paths: | 383 # ijar doesn't set the CRC32 field. |
| 101 input_paths = [] | 384 if path.endswith('.interface.jar'): |
| 102 if not input_strings: | 385 return False |
| 103 input_strings = [] | 386 return path.endswith('.zip') or path.endswith('.apk') or path.endswith('.jar') |
| 104 | 387 |
| 105 assert record_path.endswith('.stamp'), ( | |
| 106 'record paths must end in \'.stamp\' so that they are easy to find ' | |
| 107 'and delete') | |
| 108 | 388 |
| 109 self.record_path = record_path | 389 def _ExtractZipEntries(path): |
| 110 | 390 """Returns a list of (path, CRC32) of all files within |path|.""" |
| 111 extended_info = [] | 391 entries = [] |
| 112 outer_md5 = hashlib.md5() | 392 with zipfile.ZipFile(path) as zip_file: |
| 113 for i in sorted(input_paths): | 393 for zip_info in zip_file.infolist(): |
| 114 inner_md5 = hashlib.md5() | 394 # Skip directories and empty files. |
| 115 _UpdateMd5ForPath(inner_md5, i) | 395 if zip_info.CRC: |
| 116 i = _TrimPathPrefix(i) | 396 entries.append((zip_info.filename, zip_info.CRC)) |
| 117 extended_info.append(i + '=' + inner_md5.hexdigest()) | 397 return entries |
| 118 # Include the digest in the overall diff, but not the path | |
| 119 outer_md5.update(inner_md5.hexdigest()) | |
| 120 | |
| 121 for s in map(str, input_strings): | |
| 122 outer_md5.update(s) | |
| 123 extended_info.append(s) | |
| 124 | |
| 125 self.new_digest = outer_md5.hexdigest() | |
| 126 self.new_extended_info = extended_info | |
| 127 | |
| 128 self.old_digest = '' | |
| 129 self.old_extended_info = [] | |
| 130 if os.path.exists(self.record_path): | |
| 131 with open(self.record_path, 'r') as old_record: | |
| 132 self.old_extended_info = [line.strip() for line in old_record] | |
| 133 if self.old_extended_info: | |
| 134 self.old_digest = self.old_extended_info.pop(0) | |
| 135 | |
| 136 def Write(self): | |
| 137 with open(self.record_path, 'w') as new_record: | |
| 138 new_record.write(self.new_digest) | |
| 139 new_record.write('\n' + '\n'.join(self.new_extended_info) + '\n') | |
| 140 | |
| 141 def DescribeDifference(self): | |
| 142 if self.old_digest == self.new_digest: | |
| 143 return "There's no difference." | |
| 144 if not self.old_digest: | |
| 145 return 'Previous stamp file not found.' | |
| 146 if not self.old_extended_info: | |
| 147 return 'Previous stamp file lacks extended info.' | |
| 148 diff = difflib.unified_diff(self.old_extended_info, self.new_extended_info) | |
| 149 return '\n'.join(diff) | |
| OLD | NEW |