| Index: build/android/gyp/util/md5_check.py
|
| diff --git a/build/android/gyp/util/md5_check.py b/build/android/gyp/util/md5_check.py
|
| index f21e693800ee6fec72c8c9b29dde4e92611e0dba..699d3bfe143b278ef55897b4cae46e38e46071c5 100644
|
| --- a/build/android/gyp/util/md5_check.py
|
| +++ b/build/android/gyp/util/md5_check.py
|
| @@ -4,21 +4,20 @@
|
|
|
| import difflib
|
| import hashlib
|
| +import itertools
|
| +import json
|
| import os
|
| -import re
|
| import sys
|
| +import zipfile
|
|
|
|
|
| # When set and a difference is detected, a diff of what changed is printed.
|
| _PRINT_MD5_DIFFS = int(os.environ.get('PRINT_MD5_DIFFS', 0))
|
|
|
| -# Used to strip off temp dir prefix.
|
| -_TEMP_DIR_PATTERN = re.compile(r'^/tmp/.*?/')
|
| -
|
|
|
| def CallAndRecordIfStale(
|
| function, record_path=None, input_paths=None, input_strings=None,
|
| - output_paths=None, force=False):
|
| + output_paths=None, force=False, pass_changes=False):
|
| """Calls function if outputs are stale.
|
|
|
| Outputs are considered stale if:
|
| @@ -36,34 +35,317 @@ def CallAndRecordIfStale(
|
| input_paths: List of paths to calcualte an md5 sum on.
|
| input_strings: List of strings to record verbatim.
|
| output_paths: List of output paths.
|
| - force: When True, function is always called.
|
| + force: Whether to treat outputs as missing regardless of whether they
|
| + actually are.
|
| + pass_changes: Whether to pass a Changes instance to |function|.
|
| """
|
| assert record_path or output_paths
|
| input_paths = input_paths or []
|
| input_strings = input_strings or []
|
| output_paths = output_paths or []
|
| record_path = record_path or output_paths[0] + '.md5.stamp'
|
| - md5_checker = _Md5Checker(
|
| - record_path=record_path,
|
| - input_paths=input_paths,
|
| - input_strings=input_strings)
|
| -
|
| - missing_outputs = [x for x in output_paths if not os.path.exists(x)]
|
| - is_stale = md5_checker.old_digest != md5_checker.new_digest
|
| -
|
| - if force or missing_outputs or is_stale:
|
| - if _PRINT_MD5_DIFFS:
|
| - print '=' * 80
|
| - print 'Difference found in %s:' % record_path
|
| - if missing_outputs:
|
| - print 'Outputs do not exist:\n' + '\n'.join(missing_outputs)
|
| - elif force:
|
| - print 'force=True'
|
| - else:
|
| - print md5_checker.DescribeDifference()
|
| - print '=' * 80
|
| - function()
|
| - md5_checker.Write()
|
| +
|
| + assert record_path.endswith('.stamp'), (
|
| + 'record paths must end in \'.stamp\' so that they are easy to find '
|
| + 'and delete')
|
| +
|
| + new_metadata = _Metadata()
|
| + new_metadata.AddStrings(input_strings)
|
| +
|
| + for path in input_paths:
|
| + if _IsZipFile(path):
|
| + entries = _ExtractZipEntries(path)
|
| + new_metadata.AddZipFile(path, entries)
|
| + else:
|
| + new_metadata.AddFile(path, _Md5ForPath(path))
|
| +
|
| + old_metadata = None
|
| + missing_outputs = [x for x in output_paths if force or not os.path.exists(x)]
|
| + # When outputs are missing, don't bother gathering change information.
|
| + if not missing_outputs and os.path.exists(record_path):
|
| + with open(record_path, 'r') as jsonfile:
|
| + try:
|
| + old_metadata = _Metadata.FromFile(jsonfile)
|
| + except: # pylint: disable=bare-except
|
| + pass # Not yet using new file format.
|
| +
|
| + changes = Changes(old_metadata, new_metadata, force, missing_outputs)
|
| + if not changes.HasChanges():
|
| + return
|
| +
|
| + if _PRINT_MD5_DIFFS:
|
| + print '=' * 80
|
| + print 'Target is stale: %s' % record_path
|
| + print changes.DescribeDifference()
|
| + print '=' * 80
|
| +
|
| + # Delete the old metdata beforehand since failures leave it in an
|
| + # inderterminate state.
|
| + if old_metadata:
|
| + os.unlink(record_path)
|
| +
|
| + args = (changes,) if pass_changes else ()
|
| + function(*args)
|
| +
|
| + with open(record_path, 'w') as f:
|
| + new_metadata.ToFile(f)
|
| +
|
| +
|
| +class Changes(object):
|
| + """Provides and API for querying what changed between runs."""
|
| +
|
| + def __init__(self, old_metadata, new_metadata, force, missing_outputs):
|
| + self.old_metadata = old_metadata
|
| + self.new_metadata = new_metadata
|
| + self.force = force
|
| + self.missing_outputs = missing_outputs
|
| +
|
| + def _GetOldTag(self, path, subpath=None):
|
| + return self.old_metadata and self.old_metadata.GetTag(path, subpath)
|
| +
|
| + def HasChanges(self):
|
| + """Returns whether any changes exist."""
|
| + return (self.force or
|
| + not self.old_metadata or
|
| + self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5() or
|
| + self.old_metadata.FilesMd5() != self.new_metadata.FilesMd5())
|
| +
|
| + def AddedOrModifiedOnly(self):
|
| + """Returns whether the only changes were from added or modified (sub)files.
|
| +
|
| + No missing outputs, no removed paths/subpaths.
|
| + """
|
| + if (self.force or
|
| + not self.old_metadata or
|
| + self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5()):
|
| + return False
|
| + if any(self.IterRemovedPaths()):
|
| + return False
|
| + for path in self.IterModifiedPaths():
|
| + if any(self.IterRemovedSubpaths(path)):
|
| + return False
|
| + return True
|
| +
|
| + def IterAddedPaths(self):
|
| + """Generator for paths that were added."""
|
| + for path in self.new_metadata.IterPaths():
|
| + if self._GetOldTag(path) is None:
|
| + yield path
|
| +
|
| + def IterAddedSubpaths(self, path):
|
| + """Generator for paths that were added within the given zip file."""
|
| + for subpath in self.new_metadata.IterSubpaths(path):
|
| + if self._GetOldTag(path, subpath) is None:
|
| + yield subpath
|
| +
|
| + def IterRemovedPaths(self):
|
| + """Generator for paths that were removed."""
|
| + if self.old_metadata:
|
| + for path in self.old_metadata.IterPaths():
|
| + if self.new_metadata.GetTag(path) is None:
|
| + yield path
|
| +
|
| + def IterRemovedSubpaths(self, path):
|
| + """Generator for paths that were removed within the given zip file."""
|
| + if self.old_metadata:
|
| + for subpath in self.old_metadata.IterSubpaths(path):
|
| + if self.new_metadata.GetTag(path, subpath) is None:
|
| + yield subpath
|
| +
|
| + def IterModifiedPaths(self):
|
| + """Generator for paths whose contents have changed."""
|
| + for path in self.new_metadata.IterPaths():
|
| + old_tag = self._GetOldTag(path)
|
| + new_tag = self.new_metadata.GetTag(path)
|
| + if old_tag is not None and old_tag != new_tag:
|
| + yield path
|
| +
|
| + def IterModifiedSubpaths(self, path):
|
| + """Generator for paths within a zip file whose contents have changed."""
|
| + for subpath in self.new_metadata.IterSubpaths(path):
|
| + old_tag = self._GetOldTag(path, subpath)
|
| + new_tag = self.new_metadata.GetTag(path, subpath)
|
| + if old_tag is not None and old_tag != new_tag:
|
| + yield subpath
|
| +
|
| + def IterChangedPaths(self):
|
| + """Generator for all changed paths (added/removed/modified)."""
|
| + return itertools.chain(self.IterRemovedPaths(),
|
| + self.IterModifiedPaths(),
|
| + self.IterAddedPaths())
|
| +
|
| + def IterChangedSubpaths(self, path):
|
| + """Generator for paths within a zip that were added/removed/modified."""
|
| + return itertools.chain(self.IterRemovedSubpaths(path),
|
| + self.IterModifiedSubpaths(path),
|
| + self.IterAddedSubpaths(path))
|
| +
|
| + def DescribeDifference(self):
|
| + """Returns a human-readable description of what changed."""
|
| + if self.force:
|
| + return 'force=True'
|
| + elif self.missing_outputs:
|
| + return 'Outputs do not exist:\n ' + '\n '.join(self.missing_outputs)
|
| + elif self.old_metadata is None:
|
| + return 'Previous stamp file not found.'
|
| +
|
| + if self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5():
|
| + ndiff = difflib.ndiff(self.old_metadata.GetStrings(),
|
| + self.new_metadata.GetStrings())
|
| + changed = [s for s in ndiff if not s.startswith(' ')]
|
| + return 'Input strings changed:\n ' + '\n '.join(changed)
|
| +
|
| + if self.old_metadata.FilesMd5() == self.new_metadata.FilesMd5():
|
| + return "There's no difference."
|
| +
|
| + lines = []
|
| + lines.extend('Added: ' + p for p in self.IterAddedPaths())
|
| + lines.extend('Removed: ' + p for p in self.IterRemovedPaths())
|
| + for path in self.IterModifiedPaths():
|
| + lines.append('Modified: ' + path)
|
| + lines.extend(' -> Subpath added: ' + p
|
| + for p in self.IterAddedSubpaths(path))
|
| + lines.extend(' -> Subpath removed: ' + p
|
| + for p in self.IterRemovedSubpaths(path))
|
| + lines.extend(' -> Subpath modified: ' + p
|
| + for p in self.IterModifiedSubpaths(path))
|
| + if lines:
|
| + return 'Input files changed:\n ' + '\n '.join(lines)
|
| + return 'I have no idea what changed (there is a bug).'
|
| +
|
| +
|
| +class _Metadata(object):
|
| + """Data model for tracking change metadata."""
|
| + # Schema:
|
| + # {
|
| + # "files-md5": "VALUE",
|
| + # "strings-md5": "VALUE",
|
| + # "input-files": [
|
| + # {
|
| + # "path": "path.jar",
|
| + # "tag": "{MD5 of entries}",
|
| + # "entries": [
|
| + # { "path": "org/chromium/base/Foo.class", "tag": "{CRC32}" }, ...
|
| + # ]
|
| + # }, {
|
| + # "path": "path.txt",
|
| + # "tag": "{MD5}",
|
| + # }
|
| + # ],
|
| + # "input-strings": ["a", "b", ...],
|
| + # }
|
| + def __init__(self):
|
| + self._files_md5 = None
|
| + self._strings_md5 = None
|
| + self._files = []
|
| + self._strings = []
|
| + # Map of (path, subpath) -> entry. Created upon first call to _GetEntry().
|
| + self._file_map = None
|
| +
|
| + @classmethod
|
| + def FromFile(cls, fileobj):
|
| + """Returns a _Metadata initialized from a file object."""
|
| + ret = cls()
|
| + obj = json.load(fileobj)
|
| + ret._files_md5 = obj['files-md5']
|
| + ret._strings_md5 = obj['strings-md5']
|
| + ret._files = obj['input-files']
|
| + ret._strings = obj['input-strings']
|
| + return ret
|
| +
|
| + def ToFile(self, fileobj):
|
| + """Serializes metadata to the given file object."""
|
| + obj = {
|
| + "files-md5": self.FilesMd5(),
|
| + "strings-md5": self.StringsMd5(),
|
| + "input-files": self._files,
|
| + "input-strings": self._strings,
|
| + }
|
| + json.dump(obj, fileobj, indent=2)
|
| +
|
| + def _AssertNotQueried(self):
|
| + assert self._files_md5 is None
|
| + assert self._strings_md5 is None
|
| + assert self._file_map is None
|
| +
|
| + def AddStrings(self, values):
|
| + self._AssertNotQueried()
|
| + self._strings.extend(str(v) for v in values)
|
| +
|
| + def AddFile(self, path, tag):
|
| + """Adds metadata for a non-zip file.
|
| +
|
| + Args:
|
| + path: Path to the file.
|
| + tag: A short string representative of the file contents.
|
| + """
|
| + self._AssertNotQueried()
|
| + self._files.append({
|
| + 'path': path,
|
| + 'tag': tag,
|
| + })
|
| +
|
| + def AddZipFile(self, path, entries):
|
| + """Adds metadata for a zip file.
|
| +
|
| + Args:
|
| + path: Path to the file.
|
| + entries: List of (subpath, tag) tuples for entries within the zip.
|
| + """
|
| + self._AssertNotQueried()
|
| + tag = _ComputeInlineMd5(itertools.chain((e[0] for e in entries),
|
| + (e[1] for e in entries)))
|
| + self._files.append({
|
| + 'path': path,
|
| + 'tag': tag,
|
| + 'entries': [{"path": e[0], "tag": e[1]} for e in entries],
|
| + })
|
| +
|
| + def GetStrings(self):
|
| + """Returns the list of input strings."""
|
| + return self._strings
|
| +
|
| + def FilesMd5(self):
|
| + """Lazily computes and returns the aggregate md5 of input files."""
|
| + if self._files_md5 is None:
|
| + # Omit paths from md5 since temporary files have random names.
|
| + self._files_md5 = _ComputeInlineMd5(
|
| + self.GetTag(p) for p in sorted(self.IterPaths()))
|
| + return self._files_md5
|
| +
|
| + def StringsMd5(self):
|
| + """Lazily computes and returns the aggregate md5 of input strings."""
|
| + if self._strings_md5 is None:
|
| + self._strings_md5 = _ComputeInlineMd5(self._strings)
|
| + return self._strings_md5
|
| +
|
| + def _GetEntry(self, path, subpath=None):
|
| + """Returns the JSON entry for the given path / subpath."""
|
| + if self._file_map is None:
|
| + self._file_map = {}
|
| + for entry in self._files:
|
| + self._file_map[(entry['path'], None)] = entry
|
| + for subentry in entry.get('entries', ()):
|
| + self._file_map[(entry['path'], subentry['path'])] = subentry
|
| + return self._file_map.get((path, subpath))
|
| +
|
| + def GetTag(self, path, subpath=None):
|
| + """Returns the tag for the given path / subpath."""
|
| + ret = self._GetEntry(path, subpath)
|
| + return ret and ret['tag']
|
| +
|
| + def IterPaths(self):
|
| + """Returns a generator for all top-level paths."""
|
| + return (e['path'] for e in self._files)
|
| +
|
| + def IterSubpaths(self, path):
|
| + """Returns a generator for all subpaths in the given zip.
|
| +
|
| + If the given path is not a zip file, returns an empty generator.
|
| + """
|
| + outer_entry = self._GetEntry(path)
|
| + subentries = outer_entry.get('entries', [])
|
| + return (entry['path'] for entry in subentries)
|
|
|
|
|
| def _UpdateMd5ForFile(md5, path, block_size=2**16):
|
| @@ -81,70 +363,37 @@ def _UpdateMd5ForDirectory(md5, dir_path):
|
| _UpdateMd5ForFile(md5, os.path.join(root, f))
|
|
|
|
|
| -def _UpdateMd5ForPath(md5, path):
|
| +def _Md5ForPath(path):
|
| + md5 = hashlib.md5()
|
| if os.path.isdir(path):
|
| _UpdateMd5ForDirectory(md5, path)
|
| else:
|
| _UpdateMd5ForFile(md5, path)
|
| + return md5.hexdigest()
|
|
|
|
|
| -def _TrimPathPrefix(path):
|
| - """Attempts to remove temp dir prefix from the path.
|
| +def _ComputeInlineMd5(iterable):
|
| + """Computes the md5 of the concatenated parameters."""
|
| + md5 = hashlib.md5()
|
| + for item in iterable:
|
| + md5.update(str(item))
|
| + return md5.hexdigest()
|
|
|
| - Use this only for extended_info (not for the actual md5).
|
| - """
|
| - return _TEMP_DIR_PATTERN.sub('{TMP}', path)
|
| -
|
| -
|
| -class _Md5Checker(object):
|
| - def __init__(self, record_path=None, input_paths=None, input_strings=None):
|
| - if not input_paths:
|
| - input_paths = []
|
| - if not input_strings:
|
| - input_strings = []
|
| -
|
| - assert record_path.endswith('.stamp'), (
|
| - 'record paths must end in \'.stamp\' so that they are easy to find '
|
| - 'and delete')
|
| -
|
| - self.record_path = record_path
|
| -
|
| - extended_info = []
|
| - outer_md5 = hashlib.md5()
|
| - for i in sorted(input_paths):
|
| - inner_md5 = hashlib.md5()
|
| - _UpdateMd5ForPath(inner_md5, i)
|
| - i = _TrimPathPrefix(i)
|
| - extended_info.append(i + '=' + inner_md5.hexdigest())
|
| - # Include the digest in the overall diff, but not the path
|
| - outer_md5.update(inner_md5.hexdigest())
|
| -
|
| - for s in (str(s) for s in input_strings):
|
| - outer_md5.update(s)
|
| - extended_info.append(s)
|
| -
|
| - self.new_digest = outer_md5.hexdigest()
|
| - self.new_extended_info = extended_info
|
| -
|
| - self.old_digest = ''
|
| - self.old_extended_info = []
|
| - if os.path.exists(self.record_path):
|
| - with open(self.record_path, 'r') as old_record:
|
| - self.old_extended_info = [line.strip() for line in old_record]
|
| - if self.old_extended_info:
|
| - self.old_digest = self.old_extended_info.pop(0)
|
| -
|
| - def Write(self):
|
| - with open(self.record_path, 'w') as new_record:
|
| - new_record.write(self.new_digest)
|
| - new_record.write('\n' + '\n'.join(self.new_extended_info) + '\n')
|
|
|
| - def DescribeDifference(self):
|
| - if self.old_digest == self.new_digest:
|
| - return "There's no difference."
|
| - if not self.old_digest:
|
| - return 'Previous stamp file not found.'
|
| - if not self.old_extended_info:
|
| - return 'Previous stamp file lacks extended info.'
|
| - diff = difflib.unified_diff(self.old_extended_info, self.new_extended_info)
|
| - return '\n'.join(diff)
|
| +def _IsZipFile(path):
|
| + """Returns whether to treat the given file as a zip file."""
|
| + # ijar doesn't set the CRC32 field.
|
| + if path.endswith('.interface.jar'):
|
| + return False
|
| + return path[-4:] in ('.zip', '.apk', '.jar') or path.endswith('.srcjar')
|
| +
|
| +
|
| +def _ExtractZipEntries(path):
|
| + """Returns a list of (path, CRC32) of all files within |path|."""
|
| + entries = []
|
| + with zipfile.ZipFile(path) as zip_file:
|
| + for zip_info in zip_file.infolist():
|
| + # Skip directories and empty files.
|
| + if zip_info.CRC:
|
| + entries.append((zip_info.filename, zip_info.CRC))
|
| + return entries
|
|
|