Index: build/android/gyp/util/md5_check.py |
diff --git a/build/android/gyp/util/md5_check.py b/build/android/gyp/util/md5_check.py |
index f21e693800ee6fec72c8c9b29dde4e92611e0dba..699d3bfe143b278ef55897b4cae46e38e46071c5 100644 |
--- a/build/android/gyp/util/md5_check.py |
+++ b/build/android/gyp/util/md5_check.py |
@@ -4,21 +4,20 @@ |
import difflib |
import hashlib |
+import itertools |
+import json |
import os |
-import re |
import sys |
+import zipfile |
# When set and a difference is detected, a diff of what changed is printed. |
_PRINT_MD5_DIFFS = int(os.environ.get('PRINT_MD5_DIFFS', 0)) |
-# Used to strip off temp dir prefix. |
-_TEMP_DIR_PATTERN = re.compile(r'^/tmp/.*?/') |
- |
def CallAndRecordIfStale( |
function, record_path=None, input_paths=None, input_strings=None, |
- output_paths=None, force=False): |
+ output_paths=None, force=False, pass_changes=False): |
"""Calls function if outputs are stale. |
Outputs are considered stale if: |
@@ -36,34 +35,317 @@ def CallAndRecordIfStale( |
input_paths: List of paths to calcualte an md5 sum on. |
input_strings: List of strings to record verbatim. |
output_paths: List of output paths. |
- force: When True, function is always called. |
+ force: Whether to treat outputs as missing regardless of whether they |
+ actually are. |
+ pass_changes: Whether to pass a Changes instance to |function|. |
""" |
assert record_path or output_paths |
input_paths = input_paths or [] |
input_strings = input_strings or [] |
output_paths = output_paths or [] |
record_path = record_path or output_paths[0] + '.md5.stamp' |
- md5_checker = _Md5Checker( |
- record_path=record_path, |
- input_paths=input_paths, |
- input_strings=input_strings) |
- |
- missing_outputs = [x for x in output_paths if not os.path.exists(x)] |
- is_stale = md5_checker.old_digest != md5_checker.new_digest |
- |
- if force or missing_outputs or is_stale: |
- if _PRINT_MD5_DIFFS: |
- print '=' * 80 |
- print 'Difference found in %s:' % record_path |
- if missing_outputs: |
- print 'Outputs do not exist:\n' + '\n'.join(missing_outputs) |
- elif force: |
- print 'force=True' |
- else: |
- print md5_checker.DescribeDifference() |
- print '=' * 80 |
- function() |
- md5_checker.Write() |
+ |
+ assert record_path.endswith('.stamp'), ( |
+ 'record paths must end in \'.stamp\' so that they are easy to find ' |
+ 'and delete') |
+ |
+ new_metadata = _Metadata() |
+ new_metadata.AddStrings(input_strings) |
+ |
+ for path in input_paths: |
+ if _IsZipFile(path): |
+ entries = _ExtractZipEntries(path) |
+ new_metadata.AddZipFile(path, entries) |
+ else: |
+ new_metadata.AddFile(path, _Md5ForPath(path)) |
+ |
+ old_metadata = None |
+ missing_outputs = [x for x in output_paths if force or not os.path.exists(x)] |
+ # When outputs are missing, don't bother gathering change information. |
+ if not missing_outputs and os.path.exists(record_path): |
+ with open(record_path, 'r') as jsonfile: |
+ try: |
+ old_metadata = _Metadata.FromFile(jsonfile) |
+ except: # pylint: disable=bare-except |
+ pass # Not yet using new file format. |
+ |
+ changes = Changes(old_metadata, new_metadata, force, missing_outputs) |
+ if not changes.HasChanges(): |
+ return |
+ |
+ if _PRINT_MD5_DIFFS: |
+ print '=' * 80 |
+ print 'Target is stale: %s' % record_path |
+ print changes.DescribeDifference() |
+ print '=' * 80 |
+ |
+ # Delete the old metdata beforehand since failures leave it in an |
+ # inderterminate state. |
+ if old_metadata: |
+ os.unlink(record_path) |
+ |
+ args = (changes,) if pass_changes else () |
+ function(*args) |
+ |
+ with open(record_path, 'w') as f: |
+ new_metadata.ToFile(f) |
+ |
+ |
+class Changes(object): |
+ """Provides and API for querying what changed between runs.""" |
+ |
+ def __init__(self, old_metadata, new_metadata, force, missing_outputs): |
+ self.old_metadata = old_metadata |
+ self.new_metadata = new_metadata |
+ self.force = force |
+ self.missing_outputs = missing_outputs |
+ |
+ def _GetOldTag(self, path, subpath=None): |
+ return self.old_metadata and self.old_metadata.GetTag(path, subpath) |
+ |
+ def HasChanges(self): |
+ """Returns whether any changes exist.""" |
+ return (self.force or |
+ not self.old_metadata or |
+ self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5() or |
+ self.old_metadata.FilesMd5() != self.new_metadata.FilesMd5()) |
+ |
+ def AddedOrModifiedOnly(self): |
+ """Returns whether the only changes were from added or modified (sub)files. |
+ |
+ No missing outputs, no removed paths/subpaths. |
+ """ |
+ if (self.force or |
+ not self.old_metadata or |
+ self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5()): |
+ return False |
+ if any(self.IterRemovedPaths()): |
+ return False |
+ for path in self.IterModifiedPaths(): |
+ if any(self.IterRemovedSubpaths(path)): |
+ return False |
+ return True |
+ |
+ def IterAddedPaths(self): |
+ """Generator for paths that were added.""" |
+ for path in self.new_metadata.IterPaths(): |
+ if self._GetOldTag(path) is None: |
+ yield path |
+ |
+ def IterAddedSubpaths(self, path): |
+ """Generator for paths that were added within the given zip file.""" |
+ for subpath in self.new_metadata.IterSubpaths(path): |
+ if self._GetOldTag(path, subpath) is None: |
+ yield subpath |
+ |
+ def IterRemovedPaths(self): |
+ """Generator for paths that were removed.""" |
+ if self.old_metadata: |
+ for path in self.old_metadata.IterPaths(): |
+ if self.new_metadata.GetTag(path) is None: |
+ yield path |
+ |
+ def IterRemovedSubpaths(self, path): |
+ """Generator for paths that were removed within the given zip file.""" |
+ if self.old_metadata: |
+ for subpath in self.old_metadata.IterSubpaths(path): |
+ if self.new_metadata.GetTag(path, subpath) is None: |
+ yield subpath |
+ |
+ def IterModifiedPaths(self): |
+ """Generator for paths whose contents have changed.""" |
+ for path in self.new_metadata.IterPaths(): |
+ old_tag = self._GetOldTag(path) |
+ new_tag = self.new_metadata.GetTag(path) |
+ if old_tag is not None and old_tag != new_tag: |
+ yield path |
+ |
+ def IterModifiedSubpaths(self, path): |
+ """Generator for paths within a zip file whose contents have changed.""" |
+ for subpath in self.new_metadata.IterSubpaths(path): |
+ old_tag = self._GetOldTag(path, subpath) |
+ new_tag = self.new_metadata.GetTag(path, subpath) |
+ if old_tag is not None and old_tag != new_tag: |
+ yield subpath |
+ |
+ def IterChangedPaths(self): |
+ """Generator for all changed paths (added/removed/modified).""" |
+ return itertools.chain(self.IterRemovedPaths(), |
+ self.IterModifiedPaths(), |
+ self.IterAddedPaths()) |
+ |
+ def IterChangedSubpaths(self, path): |
+ """Generator for paths within a zip that were added/removed/modified.""" |
+ return itertools.chain(self.IterRemovedSubpaths(path), |
+ self.IterModifiedSubpaths(path), |
+ self.IterAddedSubpaths(path)) |
+ |
+ def DescribeDifference(self): |
+ """Returns a human-readable description of what changed.""" |
+ if self.force: |
+ return 'force=True' |
+ elif self.missing_outputs: |
+ return 'Outputs do not exist:\n ' + '\n '.join(self.missing_outputs) |
+ elif self.old_metadata is None: |
+ return 'Previous stamp file not found.' |
+ |
+ if self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5(): |
+ ndiff = difflib.ndiff(self.old_metadata.GetStrings(), |
+ self.new_metadata.GetStrings()) |
+ changed = [s for s in ndiff if not s.startswith(' ')] |
+ return 'Input strings changed:\n ' + '\n '.join(changed) |
+ |
+ if self.old_metadata.FilesMd5() == self.new_metadata.FilesMd5(): |
+ return "There's no difference." |
+ |
+ lines = [] |
+ lines.extend('Added: ' + p for p in self.IterAddedPaths()) |
+ lines.extend('Removed: ' + p for p in self.IterRemovedPaths()) |
+ for path in self.IterModifiedPaths(): |
+ lines.append('Modified: ' + path) |
+ lines.extend(' -> Subpath added: ' + p |
+ for p in self.IterAddedSubpaths(path)) |
+ lines.extend(' -> Subpath removed: ' + p |
+ for p in self.IterRemovedSubpaths(path)) |
+ lines.extend(' -> Subpath modified: ' + p |
+ for p in self.IterModifiedSubpaths(path)) |
+ if lines: |
+ return 'Input files changed:\n ' + '\n '.join(lines) |
+ return 'I have no idea what changed (there is a bug).' |
+ |
+ |
+class _Metadata(object): |
+ """Data model for tracking change metadata.""" |
+ # Schema: |
+ # { |
+ # "files-md5": "VALUE", |
+ # "strings-md5": "VALUE", |
+ # "input-files": [ |
+ # { |
+ # "path": "path.jar", |
+ # "tag": "{MD5 of entries}", |
+ # "entries": [ |
+ # { "path": "org/chromium/base/Foo.class", "tag": "{CRC32}" }, ... |
+ # ] |
+ # }, { |
+ # "path": "path.txt", |
+ # "tag": "{MD5}", |
+ # } |
+ # ], |
+ # "input-strings": ["a", "b", ...], |
+ # } |
+ def __init__(self): |
+ self._files_md5 = None |
+ self._strings_md5 = None |
+ self._files = [] |
+ self._strings = [] |
+ # Map of (path, subpath) -> entry. Created upon first call to _GetEntry(). |
+ self._file_map = None |
+ |
+ @classmethod |
+ def FromFile(cls, fileobj): |
+ """Returns a _Metadata initialized from a file object.""" |
+ ret = cls() |
+ obj = json.load(fileobj) |
+ ret._files_md5 = obj['files-md5'] |
+ ret._strings_md5 = obj['strings-md5'] |
+ ret._files = obj['input-files'] |
+ ret._strings = obj['input-strings'] |
+ return ret |
+ |
+ def ToFile(self, fileobj): |
+ """Serializes metadata to the given file object.""" |
+ obj = { |
+ "files-md5": self.FilesMd5(), |
+ "strings-md5": self.StringsMd5(), |
+ "input-files": self._files, |
+ "input-strings": self._strings, |
+ } |
+ json.dump(obj, fileobj, indent=2) |
+ |
+ def _AssertNotQueried(self): |
+ assert self._files_md5 is None |
+ assert self._strings_md5 is None |
+ assert self._file_map is None |
+ |
+ def AddStrings(self, values): |
+ self._AssertNotQueried() |
+ self._strings.extend(str(v) for v in values) |
+ |
+ def AddFile(self, path, tag): |
+ """Adds metadata for a non-zip file. |
+ |
+ Args: |
+ path: Path to the file. |
+ tag: A short string representative of the file contents. |
+ """ |
+ self._AssertNotQueried() |
+ self._files.append({ |
+ 'path': path, |
+ 'tag': tag, |
+ }) |
+ |
+ def AddZipFile(self, path, entries): |
+ """Adds metadata for a zip file. |
+ |
+ Args: |
+ path: Path to the file. |
+ entries: List of (subpath, tag) tuples for entries within the zip. |
+ """ |
+ self._AssertNotQueried() |
+ tag = _ComputeInlineMd5(itertools.chain((e[0] for e in entries), |
+ (e[1] for e in entries))) |
+ self._files.append({ |
+ 'path': path, |
+ 'tag': tag, |
+ 'entries': [{"path": e[0], "tag": e[1]} for e in entries], |
+ }) |
+ |
+ def GetStrings(self): |
+ """Returns the list of input strings.""" |
+ return self._strings |
+ |
+ def FilesMd5(self): |
+ """Lazily computes and returns the aggregate md5 of input files.""" |
+ if self._files_md5 is None: |
+ # Omit paths from md5 since temporary files have random names. |
+ self._files_md5 = _ComputeInlineMd5( |
+ self.GetTag(p) for p in sorted(self.IterPaths())) |
+ return self._files_md5 |
+ |
+ def StringsMd5(self): |
+ """Lazily computes and returns the aggregate md5 of input strings.""" |
+ if self._strings_md5 is None: |
+ self._strings_md5 = _ComputeInlineMd5(self._strings) |
+ return self._strings_md5 |
+ |
+ def _GetEntry(self, path, subpath=None): |
+ """Returns the JSON entry for the given path / subpath.""" |
+ if self._file_map is None: |
+ self._file_map = {} |
+ for entry in self._files: |
+ self._file_map[(entry['path'], None)] = entry |
+ for subentry in entry.get('entries', ()): |
+ self._file_map[(entry['path'], subentry['path'])] = subentry |
+ return self._file_map.get((path, subpath)) |
+ |
+ def GetTag(self, path, subpath=None): |
+ """Returns the tag for the given path / subpath.""" |
+ ret = self._GetEntry(path, subpath) |
+ return ret and ret['tag'] |
+ |
+ def IterPaths(self): |
+ """Returns a generator for all top-level paths.""" |
+ return (e['path'] for e in self._files) |
+ |
+ def IterSubpaths(self, path): |
+ """Returns a generator for all subpaths in the given zip. |
+ |
+ If the given path is not a zip file, returns an empty generator. |
+ """ |
+ outer_entry = self._GetEntry(path) |
+ subentries = outer_entry.get('entries', []) |
+ return (entry['path'] for entry in subentries) |
def _UpdateMd5ForFile(md5, path, block_size=2**16): |
@@ -81,70 +363,37 @@ def _UpdateMd5ForDirectory(md5, dir_path): |
_UpdateMd5ForFile(md5, os.path.join(root, f)) |
-def _UpdateMd5ForPath(md5, path): |
+def _Md5ForPath(path): |
+ md5 = hashlib.md5() |
if os.path.isdir(path): |
_UpdateMd5ForDirectory(md5, path) |
else: |
_UpdateMd5ForFile(md5, path) |
+ return md5.hexdigest() |
-def _TrimPathPrefix(path): |
- """Attempts to remove temp dir prefix from the path. |
+def _ComputeInlineMd5(iterable): |
+ """Computes the md5 of the concatenated parameters.""" |
+ md5 = hashlib.md5() |
+ for item in iterable: |
+ md5.update(str(item)) |
+ return md5.hexdigest() |
- Use this only for extended_info (not for the actual md5). |
- """ |
- return _TEMP_DIR_PATTERN.sub('{TMP}', path) |
- |
- |
-class _Md5Checker(object): |
- def __init__(self, record_path=None, input_paths=None, input_strings=None): |
- if not input_paths: |
- input_paths = [] |
- if not input_strings: |
- input_strings = [] |
- |
- assert record_path.endswith('.stamp'), ( |
- 'record paths must end in \'.stamp\' so that they are easy to find ' |
- 'and delete') |
- |
- self.record_path = record_path |
- |
- extended_info = [] |
- outer_md5 = hashlib.md5() |
- for i in sorted(input_paths): |
- inner_md5 = hashlib.md5() |
- _UpdateMd5ForPath(inner_md5, i) |
- i = _TrimPathPrefix(i) |
- extended_info.append(i + '=' + inner_md5.hexdigest()) |
- # Include the digest in the overall diff, but not the path |
- outer_md5.update(inner_md5.hexdigest()) |
- |
- for s in (str(s) for s in input_strings): |
- outer_md5.update(s) |
- extended_info.append(s) |
- |
- self.new_digest = outer_md5.hexdigest() |
- self.new_extended_info = extended_info |
- |
- self.old_digest = '' |
- self.old_extended_info = [] |
- if os.path.exists(self.record_path): |
- with open(self.record_path, 'r') as old_record: |
- self.old_extended_info = [line.strip() for line in old_record] |
- if self.old_extended_info: |
- self.old_digest = self.old_extended_info.pop(0) |
- |
- def Write(self): |
- with open(self.record_path, 'w') as new_record: |
- new_record.write(self.new_digest) |
- new_record.write('\n' + '\n'.join(self.new_extended_info) + '\n') |
- def DescribeDifference(self): |
- if self.old_digest == self.new_digest: |
- return "There's no difference." |
- if not self.old_digest: |
- return 'Previous stamp file not found.' |
- if not self.old_extended_info: |
- return 'Previous stamp file lacks extended info.' |
- diff = difflib.unified_diff(self.old_extended_info, self.new_extended_info) |
- return '\n'.join(diff) |
+def _IsZipFile(path): |
+ """Returns whether to treat the given file as a zip file.""" |
+ # ijar doesn't set the CRC32 field. |
+ if path.endswith('.interface.jar'): |
+ return False |
+ return path[-4:] in ('.zip', '.apk', '.jar') or path.endswith('.srcjar') |
+ |
+ |
+def _ExtractZipEntries(path): |
+ """Returns a list of (path, CRC32) of all files within |path|.""" |
+ entries = [] |
+ with zipfile.ZipFile(path) as zip_file: |
+ for zip_info in zip_file.infolist(): |
+ # Skip directories and empty files. |
+ if zip_info.CRC: |
+ entries.append((zip_info.filename, zip_info.CRC)) |
+ return entries |