Chromium Code Reviews| Index: build/android/gyp/util/md5_check.py |
| diff --git a/build/android/gyp/util/md5_check.py b/build/android/gyp/util/md5_check.py |
| index 054caa491460de28f056f767dfd605d85fa54ad1..c738606872f9da187427cc8bc7a2eb76de431151 100644 |
| --- a/build/android/gyp/util/md5_check.py |
| +++ b/build/android/gyp/util/md5_check.py |
| @@ -4,21 +4,20 @@ |
| import difflib |
| import hashlib |
| +import itertools |
| +import json |
| import os |
| -import re |
| import sys |
| +import zipfile |
| # When set and a difference is detected, a diff of what changed is printed. |
| _PRINT_MD5_DIFFS = int(os.environ.get('PRINT_MD5_DIFFS', 0)) |
| -# Used to strip off temp dir prefix. |
| -_TEMP_DIR_PATTERN = re.compile(r'^/tmp/.*?/') |
| - |
| def CallAndRecordIfStale( |
| function, record_path=None, input_paths=None, input_strings=None, |
| - output_paths=None, force=False): |
| + output_paths=None, force=False, pass_changes=False): |
| """Calls function if outputs are stale. |
| Outputs are considered stale if: |
| @@ -36,33 +35,315 @@ def CallAndRecordIfStale( |
| input_paths: List of paths to calcualte an md5 sum on. |
| input_strings: List of strings to record verbatim. |
| output_paths: List of output paths. |
| - force: When True, function is always called. |
| + force: Whether to treat outputs as missing regardless of whether they |
| + actually are. |
| + pass_changes: Whether to pass a Changes instance to |function|. |
| """ |
| input_paths = input_paths or [] |
| input_strings = input_strings or [] |
| output_paths = output_paths or [] |
| record_path = record_path or output_paths[0] + '.md5.stamp' |
| - md5_checker = _Md5Checker( |
| - record_path=record_path, |
| - input_paths=input_paths, |
| - input_strings=input_strings) |
| - |
| - missing_outputs = [x for x in output_paths if not os.path.exists(x)] |
| - is_stale = md5_checker.old_digest != md5_checker.new_digest |
| - |
| - if force or missing_outputs or is_stale: |
| - if _PRINT_MD5_DIFFS: |
| - print '=' * 80 |
| - print 'Difference found in %s:' % record_path |
| - if missing_outputs: |
| - print 'Outputs do not exist:\n' + '\n'.join(missing_outputs) |
| - elif force: |
| - print 'force=True' |
| - else: |
| - print md5_checker.DescribeDifference() |
| - print '=' * 80 |
| - function() |
| - md5_checker.Write() |
| + |
| + assert record_path.endswith('.stamp'), ( |
| + 'record paths must end in \'.stamp\' so that they are easy to find ' |
| + 'and delete') |
| + |
| + new_metadata = _Metadata() |
| + new_metadata.AddStrings(input_strings) |
| + |
| + for path in input_paths: |
| + if _IsZipFile(path): |
| + entries = _ExtractZipEntries(path) |
| + new_metadata.AddZipFile(path, entries) |
| + else: |
| + new_metadata.AddFile(path, _Md5ForPath(path)) |
| + |
| + old_metadata = None |
| + missing_outputs = [x for x in output_paths if force or not os.path.exists(x)] |
| + # When outputs are missing, don't bother gathering change information. |
| + if not missing_outputs and os.path.exists(record_path): |
| + with open(record_path, 'r') as jsonfile: |
| + try: |
| + old_metadata = _Metadata.FromFile(jsonfile) |
| + except: # pylint: disable=bare-except |
| + pass # Not yet using new file format. |
| + |
| + changes = Changes(old_metadata, new_metadata, force, missing_outputs) |
| + if not changes.HasChanges(): |
| + return |
| + |
| + if _PRINT_MD5_DIFFS: |
| + print '=' * 80 |
| + print 'Target is stale: %s' % record_path |
| + print changes.DescribeDifference() |
| + print '=' * 80 |
| + |
| + # Delete the old metdata beforehand since failures leave it in an |
| + # inderterminate state. |
| + if old_metadata: |
| + os.unlink(record_path) |
| + |
| + args = (changes,) if pass_changes else () |
| + function(*args) |
| + |
| + with open(record_path, 'w') as f: |
| + new_metadata.Write(f) |
| + |
| + |
| +class Changes(object): |
| + """Provides and API for querying what changed between runs.""" |
| + |
| + def __init__(self, old_metadata, new_metadata, force, missing_outputs): |
| + self.old_metadata = old_metadata |
| + self.new_metadata = new_metadata |
| + self.force = force |
| + self.missing_outputs = missing_outputs |
| + |
| + def _GetOldTag(self, path, subpath=None): |
| + return self.old_metadata and self.old_metadata.GetTag(path, subpath) |
| + |
| + def HasChanges(self): |
| + """Returns whether any changes exist.""" |
| + return (self.force or |
| + self.old_metadata is None or |
|
jbudorick
2015/09/23 00:26:20
not self.old_metadata?
agrieve
2015/09/23 02:07:56
Done.
|
| + self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5() or |
| + self.old_metadata.FilesMd5() != self.new_metadata.FilesMd5()) |
| + |
| + def AddedOrModifiedOnly(self): |
| + """Returns whether the only changes were from added or modified (sub)files. |
| + |
| + No missing outputs, no removed paths/subpaths. |
| + """ |
| + if (self.force or |
| + not self.old_metadata or |
| + self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5()): |
| + return False |
| + if any(self.IterRemovedPaths()): |
| + return False |
| + for path in self.IterModifiedPaths(): |
| + if any(self.IterRemovedSubpaths(path)): |
| + return False |
| + return True |
| + |
| + def IterAddedPaths(self): |
| + """Generator for paths that were added.""" |
| + for path in self.new_metadata.IterPaths(): |
| + if self._GetOldTag(path) is None: |
| + yield path |
| + |
| + def IterAddedSubpaths(self, path): |
| + """Generator for paths that were added within the given zip file.""" |
| + for subpath in self.new_metadata.IterSubpaths(path): |
| + if self._GetOldTag(path, subpath) is None: |
| + yield subpath |
| + |
| + def IterRemovedPaths(self): |
| + """Generator for paths that were removed.""" |
| + if self.old_metadata: |
| + for path in self.old_metadata.IterPaths(): |
| + if self.new_metadata.GetTag(path) is None: |
| + yield path |
| + |
| + def IterRemovedSubpaths(self, path): |
| + """Generator for paths that were removed within the given zip file.""" |
| + if self.old_metadata: |
| + for subpath in self.old_metadata.IterSubpaths(path): |
| + if self.new_metadata.GetTag(path, subpath) is None: |
| + yield subpath |
| + |
| + def IterModifiedPaths(self): |
| + """Generator for paths whose contents have changed.""" |
| + for path in self.new_metadata.IterPaths(): |
| + old_tag = self._GetOldTag(path) |
| + new_tag = self.new_metadata.GetTag(path) |
| + if old_tag is not None and old_tag != new_tag: |
| + yield path |
| + |
| + def IterModifiedSubpaths(self, path): |
| + """Generator for paths within a zip file whose contents have changed.""" |
| + for subpath in self.new_metadata.IterSubpaths(path): |
| + old_tag = self._GetOldTag(path, subpath) |
| + new_tag = self.new_metadata.GetTag(path, subpath) |
| + if old_tag is not None and old_tag != new_tag: |
| + yield subpath |
| + |
| + def IterChangedPaths(self, path): |
| + """Generator for all changed paths (added/removed/modified).""" |
| + return itertools.chain(self.IterRemovedPaths(path), |
| + self.IterModifiedPaths(path), |
| + self.IterAddedPaths(path)) |
| + |
| + def IterChangedSubpaths(self, path): |
| + """Generator for paths within a zip that were added/removed/modified.""" |
| + return itertools.chain(self.IterRemovedSubpaths(path), |
| + self.IterModifiedSubpaths(path), |
| + self.IterAddedSubpaths(path)) |
| + |
| + def DescribeDifference(self): |
| + """Returns a human-readable description of what changed.""" |
| + if self.force: |
| + return 'force=True' |
| + elif self.missing_outputs: |
| + return 'Outputs do not exist:\n ' + '\n '.join(self.missing_outputs) |
| + elif self.old_metadata is None: |
| + return 'Previous stamp file not found.' |
| + |
| + if self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5(): |
| + ndiff = difflib.ndiff(self.old_metadata.GetStrings(), |
| + self.new_metadata.GetStrings()) |
| + changed = [s for s in ndiff if not s.startswith(' ')] |
| + return 'Input strings changed:\n ' + '\n '.join(changed) |
| + |
| + if self.old_metadata.FilesMd5() == self.new_metadata.FilesMd5(): |
| + return "There's no difference." |
| + |
| + lines = [] |
| + lines.extend('Added: ' + p for p in self.IterAddedPaths()) |
| + lines.extend('Removed: ' + p for p in self.IterRemovedPaths()) |
| + for path in self.IterModifiedPaths(): |
| + lines.append('Modified: ' + path) |
| + lines.extend(' -> Subpath added: ' + p |
| + for p in self.IterAddedSubpaths(path)) |
| + lines.extend(' -> Subpath removed: ' + p |
| + for p in self.IterRemovedSubpaths(path)) |
| + lines.extend(' -> Subpath modified: ' + p |
| + for p in self.IterModifiedSubpaths(path)) |
| + if lines: |
| + return 'Input files changed:\n ' + '\n '.join(lines) |
| + return 'I have no idea what changed (there is a bug).' |
|
jbudorick
2015/09/23 00:26:20
hahaha
|
| + |
| + |
| +class _Metadata(object): |
| + """Data model for tracking change metadata.""" |
| + # Schema: |
| + # { |
| + # "files-md5": "VALUE", |
| + # "strings-md5": "VALUE", |
| + # "input-files": [ |
| + # { |
| + # "path": "path.jar", |
| + # "tag": "{MD5 of entries}", |
| + # "entries": [ |
| + # { "path": "org/chromium/base/Foo.class", "tag": "{CRC32}" }, ... |
| + # ] |
| + # }, { |
| + # "path": "path.txt", |
| + # "tag": "{MD5}", |
|
jbudorick
2015/09/23 00:26:20
nit: missing closing }
agrieve
2015/09/23 02:07:56
Done.
|
| + # ], |
| + # "input-strings": ["a", "b", ...], |
| + # } |
| + def __init__(self): |
| + self._files_md5 = None |
| + self._strings_md5 = None |
| + self._files = [] |
| + self._strings = [] |
| + # Map of (path, subpath) -> entry. Created upon first call to _GetEntry(). |
| + self._file_map = None |
| + |
| + @classmethod |
| + def FromFile(cls, fileobj): |
|
jbudorick
2015/09/23 00:26:20
not Read/Write or FromFile/ToFile? :(
agrieve
2015/09/23 02:07:56
Done.
|
| + """Returns a _Metadata initialized from a file object.""" |
| + ret = cls() |
| + obj = json.load(fileobj) |
| + ret._files_md5 = obj['files-md5'] |
| + ret._strings_md5 = obj['strings-md5'] |
| + ret._files = obj['input-files'] |
| + ret._strings = obj['input-strings'] |
| + return ret |
| + |
| + def Write(self, fileobj): |
| + """Serializes metadata to the given file object.""" |
| + obj = { |
| + "files-md5": self.FilesMd5(), |
| + "strings-md5": self.StringsMd5(), |
| + "input-files": self._files, |
| + "input-strings": self._strings, |
| + } |
| + json.dump(obj, fileobj, indent=2) |
| + |
| + def _AssertNotQueried(self): |
| + assert self._files_md5 is None |
| + assert self._strings_md5 is None |
| + assert self._file_map is None |
| + |
| + def AddStrings(self, values): |
| + self._AssertNotQueried() |
| + self._strings.extend(str(v) for v in values) |
| + |
| + def AddFile(self, path, tag): |
| + """Adds metadata for a non-zip file. |
| + |
| + Args: |
| + path: Path to the file. |
| + tag: A short string representative of the file contents. |
| + """ |
| + self._AssertNotQueried() |
| + self._files.append({ |
| + 'path': path, |
| + 'tag': tag, |
| + }) |
| + |
| + def AddZipFile(self, path, entries): |
| + """Adds metadata for a zip file. |
| + |
| + Args: |
| + path: Path to the file. |
| + entries: List of (subpath, tag) tuples for entries within the zip. |
| + """ |
| + self._AssertNotQueried() |
| + tag = _ComputeInlineMd5(itertools.chain((e[0] for e in entries), |
| + (e[1] for e in entries))) |
| + self._files.append({ |
| + 'path': path, |
| + 'tag': tag, |
| + 'entries': [{"path": e[0], "tag": e[1]} for e in entries], |
| + }) |
| + |
| + def GetStrings(self): |
| + """Returns the list of input strings.""" |
| + return self._strings |
| + |
| + def FilesMd5(self): |
| + """Lazily computes and returns the aggregate md5 of input files.""" |
| + if self._files_md5 is None: |
| + # Omit paths from md5 since temporary files have random names. |
| + self._files_md5 = _ComputeInlineMd5( |
| + self.GetTag(p) for p in sorted(self.IterPaths())) |
| + return self._files_md5 |
| + |
| + def StringsMd5(self): |
| + """Lazily computes and returns the aggregate md5 of input strings.""" |
| + if self._strings_md5 is None: |
| + self._strings_md5 = _ComputeInlineMd5(self._strings) |
| + return self._strings_md5 |
| + |
| + def _GetEntry(self, path, subpath=None): |
| + """Returns the JSON entry for the given path / subpath.""" |
| + if self._file_map is None: |
| + self._file_map = {} |
| + for entry in self._files: |
| + self._file_map[(entry['path'], None)] = entry |
| + for subentry in entry.get('entries', ()): |
| + self._file_map[(entry['path'], subentry['path'])] = subentry |
| + return self._file_map.get((path, subpath)) |
| + |
| + def GetTag(self, path, subpath=None): |
| + """Returns the tag for the given path / subpath.""" |
| + ret = self._GetEntry(path, subpath) |
| + return ret and ret['tag'] |
| + |
| + def IterPaths(self): |
| + """Returns a generator for all top-level paths.""" |
| + return (e['path'] for e in self._files) |
| + |
| + def IterSubpaths(self, path): |
| + """Returns a generator for all subpaths in the given zip. |
| + |
| + If the given path is not a zip file, returns an empty generator. |
| + """ |
| + outer_entry = self._GetEntry(path) |
| + subentries = outer_entry.get('entries', []) |
| + return (entry['path'] for entry in subentries) |
| def _UpdateMd5ForFile(md5, path, block_size=2**16): |
| @@ -80,70 +361,37 @@ def _UpdateMd5ForDirectory(md5, dir_path): |
| _UpdateMd5ForFile(md5, os.path.join(root, f)) |
| -def _UpdateMd5ForPath(md5, path): |
| +def _Md5ForPath(path): |
| + md5 = hashlib.md5() |
| if os.path.isdir(path): |
| _UpdateMd5ForDirectory(md5, path) |
| else: |
| _UpdateMd5ForFile(md5, path) |
| + return md5.hexdigest() |
| -def _TrimPathPrefix(path): |
| - """Attempts to remove temp dir prefix from the path. |
| +def _ComputeInlineMd5(iterable): |
| + """Computes the md5 of the concatenated parameters.""" |
| + md5 = hashlib.md5() |
| + for item in iterable: |
| + md5.update(str(item)) |
| + return md5.hexdigest() |
| - Use this only for extended_info (not for the actual md5). |
| - """ |
| - return _TEMP_DIR_PATTERN.sub('{TMP}', path) |
| - |
| - |
| -class _Md5Checker(object): |
| - def __init__(self, record_path=None, input_paths=None, input_strings=None): |
| - if not input_paths: |
| - input_paths = [] |
| - if not input_strings: |
| - input_strings = [] |
| - |
| - assert record_path.endswith('.stamp'), ( |
| - 'record paths must end in \'.stamp\' so that they are easy to find ' |
| - 'and delete') |
| - |
| - self.record_path = record_path |
| - |
| - extended_info = [] |
| - outer_md5 = hashlib.md5() |
| - for i in sorted(input_paths): |
| - inner_md5 = hashlib.md5() |
| - _UpdateMd5ForPath(inner_md5, i) |
| - i = _TrimPathPrefix(i) |
| - extended_info.append(i + '=' + inner_md5.hexdigest()) |
| - # Include the digest in the overall diff, but not the path |
| - outer_md5.update(inner_md5.hexdigest()) |
| - |
| - for s in map(str, input_strings): |
| - outer_md5.update(s) |
| - extended_info.append(s) |
| - |
| - self.new_digest = outer_md5.hexdigest() |
| - self.new_extended_info = extended_info |
| - |
| - self.old_digest = '' |
| - self.old_extended_info = [] |
| - if os.path.exists(self.record_path): |
| - with open(self.record_path, 'r') as old_record: |
| - self.old_extended_info = [line.strip() for line in old_record] |
| - if self.old_extended_info: |
| - self.old_digest = self.old_extended_info.pop(0) |
| - |
| - def Write(self): |
| - with open(self.record_path, 'w') as new_record: |
| - new_record.write(self.new_digest) |
| - new_record.write('\n' + '\n'.join(self.new_extended_info) + '\n') |
| - def DescribeDifference(self): |
| - if self.old_digest == self.new_digest: |
| - return "There's no difference." |
| - if not self.old_digest: |
| - return 'Previous stamp file not found.' |
| - if not self.old_extended_info: |
| - return 'Previous stamp file lacks extended info.' |
| - diff = difflib.unified_diff(self.old_extended_info, self.new_extended_info) |
| - return '\n'.join(diff) |
| +def _IsZipFile(path): |
| + """Returns whether to treat the given file as a zip file.""" |
| + # ijar doesn't set the CRC32 field. |
| + if path.endswith('.interface.jar'): |
| + return False |
| + return path.endswith('.zip') or path.endswith('.apk') or path.endswith('.jar') |
| + |
| + |
| +def _ExtractZipEntries(path): |
| + """Returns a list of (path, CRC32) of all files within |path|.""" |
| + entries = [] |
| + with zipfile.ZipFile(path) as zip_file: |
| + for zip_info in zip_file.infolist(): |
| + # Skip directories and empty files. |
| + if zip_info.CRC: |
| + entries.append((zip_info.filename, zip_info.CRC)) |
| + return entries |