Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(714)

Unified Diff: build/android/gyp/util/md5_check.py

Issue 1361733002: Make javac invocations incremental when possible (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@apkbuilder
Patch Set: add flag and disable by default Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « build/android/gyp/util/build_utils.py ('k') | build/android/gyp/util/md5_check_test.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: build/android/gyp/util/md5_check.py
diff --git a/build/android/gyp/util/md5_check.py b/build/android/gyp/util/md5_check.py
index f21e693800ee6fec72c8c9b29dde4e92611e0dba..699d3bfe143b278ef55897b4cae46e38e46071c5 100644
--- a/build/android/gyp/util/md5_check.py
+++ b/build/android/gyp/util/md5_check.py
@@ -4,21 +4,20 @@
import difflib
import hashlib
+import itertools
+import json
import os
-import re
import sys
+import zipfile
# When set and a difference is detected, a diff of what changed is printed.
_PRINT_MD5_DIFFS = int(os.environ.get('PRINT_MD5_DIFFS', 0))
-# Used to strip off temp dir prefix.
-_TEMP_DIR_PATTERN = re.compile(r'^/tmp/.*?/')
-
def CallAndRecordIfStale(
function, record_path=None, input_paths=None, input_strings=None,
- output_paths=None, force=False):
+ output_paths=None, force=False, pass_changes=False):
"""Calls function if outputs are stale.
Outputs are considered stale if:
@@ -36,34 +35,317 @@ def CallAndRecordIfStale(
input_paths: List of paths to calcualte an md5 sum on.
input_strings: List of strings to record verbatim.
output_paths: List of output paths.
- force: When True, function is always called.
+ force: Whether to treat outputs as missing regardless of whether they
+ actually are.
+ pass_changes: Whether to pass a Changes instance to |function|.
"""
assert record_path or output_paths
input_paths = input_paths or []
input_strings = input_strings or []
output_paths = output_paths or []
record_path = record_path or output_paths[0] + '.md5.stamp'
- md5_checker = _Md5Checker(
- record_path=record_path,
- input_paths=input_paths,
- input_strings=input_strings)
-
- missing_outputs = [x for x in output_paths if not os.path.exists(x)]
- is_stale = md5_checker.old_digest != md5_checker.new_digest
-
- if force or missing_outputs or is_stale:
- if _PRINT_MD5_DIFFS:
- print '=' * 80
- print 'Difference found in %s:' % record_path
- if missing_outputs:
- print 'Outputs do not exist:\n' + '\n'.join(missing_outputs)
- elif force:
- print 'force=True'
- else:
- print md5_checker.DescribeDifference()
- print '=' * 80
- function()
- md5_checker.Write()
+
+ assert record_path.endswith('.stamp'), (
+ 'record paths must end in \'.stamp\' so that they are easy to find '
+ 'and delete')
+
+ new_metadata = _Metadata()
+ new_metadata.AddStrings(input_strings)
+
+ for path in input_paths:
+ if _IsZipFile(path):
+ entries = _ExtractZipEntries(path)
+ new_metadata.AddZipFile(path, entries)
+ else:
+ new_metadata.AddFile(path, _Md5ForPath(path))
+
+ old_metadata = None
+ missing_outputs = [x for x in output_paths if force or not os.path.exists(x)]
+ # When outputs are missing, don't bother gathering change information.
+ if not missing_outputs and os.path.exists(record_path):
+ with open(record_path, 'r') as jsonfile:
+ try:
+ old_metadata = _Metadata.FromFile(jsonfile)
+ except: # pylint: disable=bare-except
+ pass # Not yet using new file format.
+
+ changes = Changes(old_metadata, new_metadata, force, missing_outputs)
+ if not changes.HasChanges():
+ return
+
+ if _PRINT_MD5_DIFFS:
+ print '=' * 80
+ print 'Target is stale: %s' % record_path
+ print changes.DescribeDifference()
+ print '=' * 80
+
+ # Delete the old metdata beforehand since failures leave it in an
+ # inderterminate state.
+ if old_metadata:
+ os.unlink(record_path)
+
+ args = (changes,) if pass_changes else ()
+ function(*args)
+
+ with open(record_path, 'w') as f:
+ new_metadata.ToFile(f)
+
+
+class Changes(object):
+ """Provides and API for querying what changed between runs."""
+
+ def __init__(self, old_metadata, new_metadata, force, missing_outputs):
+ self.old_metadata = old_metadata
+ self.new_metadata = new_metadata
+ self.force = force
+ self.missing_outputs = missing_outputs
+
+ def _GetOldTag(self, path, subpath=None):
+ return self.old_metadata and self.old_metadata.GetTag(path, subpath)
+
+ def HasChanges(self):
+ """Returns whether any changes exist."""
+ return (self.force or
+ not self.old_metadata or
+ self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5() or
+ self.old_metadata.FilesMd5() != self.new_metadata.FilesMd5())
+
+ def AddedOrModifiedOnly(self):
+ """Returns whether the only changes were from added or modified (sub)files.
+
+ No missing outputs, no removed paths/subpaths.
+ """
+ if (self.force or
+ not self.old_metadata or
+ self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5()):
+ return False
+ if any(self.IterRemovedPaths()):
+ return False
+ for path in self.IterModifiedPaths():
+ if any(self.IterRemovedSubpaths(path)):
+ return False
+ return True
+
+ def IterAddedPaths(self):
+ """Generator for paths that were added."""
+ for path in self.new_metadata.IterPaths():
+ if self._GetOldTag(path) is None:
+ yield path
+
+ def IterAddedSubpaths(self, path):
+ """Generator for paths that were added within the given zip file."""
+ for subpath in self.new_metadata.IterSubpaths(path):
+ if self._GetOldTag(path, subpath) is None:
+ yield subpath
+
+ def IterRemovedPaths(self):
+ """Generator for paths that were removed."""
+ if self.old_metadata:
+ for path in self.old_metadata.IterPaths():
+ if self.new_metadata.GetTag(path) is None:
+ yield path
+
+ def IterRemovedSubpaths(self, path):
+ """Generator for paths that were removed within the given zip file."""
+ if self.old_metadata:
+ for subpath in self.old_metadata.IterSubpaths(path):
+ if self.new_metadata.GetTag(path, subpath) is None:
+ yield subpath
+
+ def IterModifiedPaths(self):
+ """Generator for paths whose contents have changed."""
+ for path in self.new_metadata.IterPaths():
+ old_tag = self._GetOldTag(path)
+ new_tag = self.new_metadata.GetTag(path)
+ if old_tag is not None and old_tag != new_tag:
+ yield path
+
+ def IterModifiedSubpaths(self, path):
+ """Generator for paths within a zip file whose contents have changed."""
+ for subpath in self.new_metadata.IterSubpaths(path):
+ old_tag = self._GetOldTag(path, subpath)
+ new_tag = self.new_metadata.GetTag(path, subpath)
+ if old_tag is not None and old_tag != new_tag:
+ yield subpath
+
+ def IterChangedPaths(self):
+ """Generator for all changed paths (added/removed/modified)."""
+ return itertools.chain(self.IterRemovedPaths(),
+ self.IterModifiedPaths(),
+ self.IterAddedPaths())
+
+ def IterChangedSubpaths(self, path):
+ """Generator for paths within a zip that were added/removed/modified."""
+ return itertools.chain(self.IterRemovedSubpaths(path),
+ self.IterModifiedSubpaths(path),
+ self.IterAddedSubpaths(path))
+
+ def DescribeDifference(self):
+ """Returns a human-readable description of what changed."""
+ if self.force:
+ return 'force=True'
+ elif self.missing_outputs:
+ return 'Outputs do not exist:\n ' + '\n '.join(self.missing_outputs)
+ elif self.old_metadata is None:
+ return 'Previous stamp file not found.'
+
+ if self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5():
+ ndiff = difflib.ndiff(self.old_metadata.GetStrings(),
+ self.new_metadata.GetStrings())
+ changed = [s for s in ndiff if not s.startswith(' ')]
+ return 'Input strings changed:\n ' + '\n '.join(changed)
+
+ if self.old_metadata.FilesMd5() == self.new_metadata.FilesMd5():
+ return "There's no difference."
+
+ lines = []
+ lines.extend('Added: ' + p for p in self.IterAddedPaths())
+ lines.extend('Removed: ' + p for p in self.IterRemovedPaths())
+ for path in self.IterModifiedPaths():
+ lines.append('Modified: ' + path)
+ lines.extend(' -> Subpath added: ' + p
+ for p in self.IterAddedSubpaths(path))
+ lines.extend(' -> Subpath removed: ' + p
+ for p in self.IterRemovedSubpaths(path))
+ lines.extend(' -> Subpath modified: ' + p
+ for p in self.IterModifiedSubpaths(path))
+ if lines:
+ return 'Input files changed:\n ' + '\n '.join(lines)
+ return 'I have no idea what changed (there is a bug).'
+
+
+class _Metadata(object):
+ """Data model for tracking change metadata."""
+ # Schema:
+ # {
+ # "files-md5": "VALUE",
+ # "strings-md5": "VALUE",
+ # "input-files": [
+ # {
+ # "path": "path.jar",
+ # "tag": "{MD5 of entries}",
+ # "entries": [
+ # { "path": "org/chromium/base/Foo.class", "tag": "{CRC32}" }, ...
+ # ]
+ # }, {
+ # "path": "path.txt",
+ # "tag": "{MD5}",
+ # }
+ # ],
+ # "input-strings": ["a", "b", ...],
+ # }
+ def __init__(self):
+ self._files_md5 = None
+ self._strings_md5 = None
+ self._files = []
+ self._strings = []
+ # Map of (path, subpath) -> entry. Created upon first call to _GetEntry().
+ self._file_map = None
+
+ @classmethod
+ def FromFile(cls, fileobj):
+ """Returns a _Metadata initialized from a file object."""
+ ret = cls()
+ obj = json.load(fileobj)
+ ret._files_md5 = obj['files-md5']
+ ret._strings_md5 = obj['strings-md5']
+ ret._files = obj['input-files']
+ ret._strings = obj['input-strings']
+ return ret
+
+ def ToFile(self, fileobj):
+ """Serializes metadata to the given file object."""
+ obj = {
+ "files-md5": self.FilesMd5(),
+ "strings-md5": self.StringsMd5(),
+ "input-files": self._files,
+ "input-strings": self._strings,
+ }
+ json.dump(obj, fileobj, indent=2)
+
+ def _AssertNotQueried(self):
+ assert self._files_md5 is None
+ assert self._strings_md5 is None
+ assert self._file_map is None
+
+ def AddStrings(self, values):
+ self._AssertNotQueried()
+ self._strings.extend(str(v) for v in values)
+
+ def AddFile(self, path, tag):
+ """Adds metadata for a non-zip file.
+
+ Args:
+ path: Path to the file.
+ tag: A short string representative of the file contents.
+ """
+ self._AssertNotQueried()
+ self._files.append({
+ 'path': path,
+ 'tag': tag,
+ })
+
+ def AddZipFile(self, path, entries):
+ """Adds metadata for a zip file.
+
+ Args:
+ path: Path to the file.
+ entries: List of (subpath, tag) tuples for entries within the zip.
+ """
+ self._AssertNotQueried()
+ tag = _ComputeInlineMd5(itertools.chain((e[0] for e in entries),
+ (e[1] for e in entries)))
+ self._files.append({
+ 'path': path,
+ 'tag': tag,
+ 'entries': [{"path": e[0], "tag": e[1]} for e in entries],
+ })
+
+ def GetStrings(self):
+ """Returns the list of input strings."""
+ return self._strings
+
+ def FilesMd5(self):
+ """Lazily computes and returns the aggregate md5 of input files."""
+ if self._files_md5 is None:
+ # Omit paths from md5 since temporary files have random names.
+ self._files_md5 = _ComputeInlineMd5(
+ self.GetTag(p) for p in sorted(self.IterPaths()))
+ return self._files_md5
+
+ def StringsMd5(self):
+ """Lazily computes and returns the aggregate md5 of input strings."""
+ if self._strings_md5 is None:
+ self._strings_md5 = _ComputeInlineMd5(self._strings)
+ return self._strings_md5
+
+ def _GetEntry(self, path, subpath=None):
+ """Returns the JSON entry for the given path / subpath."""
+ if self._file_map is None:
+ self._file_map = {}
+ for entry in self._files:
+ self._file_map[(entry['path'], None)] = entry
+ for subentry in entry.get('entries', ()):
+ self._file_map[(entry['path'], subentry['path'])] = subentry
+ return self._file_map.get((path, subpath))
+
+ def GetTag(self, path, subpath=None):
+ """Returns the tag for the given path / subpath."""
+ ret = self._GetEntry(path, subpath)
+ return ret and ret['tag']
+
+ def IterPaths(self):
+ """Returns a generator for all top-level paths."""
+ return (e['path'] for e in self._files)
+
+ def IterSubpaths(self, path):
+ """Returns a generator for all subpaths in the given zip.
+
+ If the given path is not a zip file, returns an empty generator.
+ """
+ outer_entry = self._GetEntry(path)
+ subentries = outer_entry.get('entries', [])
+ return (entry['path'] for entry in subentries)
def _UpdateMd5ForFile(md5, path, block_size=2**16):
@@ -81,70 +363,37 @@ def _UpdateMd5ForDirectory(md5, dir_path):
_UpdateMd5ForFile(md5, os.path.join(root, f))
-def _UpdateMd5ForPath(md5, path):
+def _Md5ForPath(path):
+ md5 = hashlib.md5()
if os.path.isdir(path):
_UpdateMd5ForDirectory(md5, path)
else:
_UpdateMd5ForFile(md5, path)
+ return md5.hexdigest()
-def _TrimPathPrefix(path):
- """Attempts to remove temp dir prefix from the path.
+def _ComputeInlineMd5(iterable):
+ """Computes the md5 of the concatenated parameters."""
+ md5 = hashlib.md5()
+ for item in iterable:
+ md5.update(str(item))
+ return md5.hexdigest()
- Use this only for extended_info (not for the actual md5).
- """
- return _TEMP_DIR_PATTERN.sub('{TMP}', path)
-
-
-class _Md5Checker(object):
- def __init__(self, record_path=None, input_paths=None, input_strings=None):
- if not input_paths:
- input_paths = []
- if not input_strings:
- input_strings = []
-
- assert record_path.endswith('.stamp'), (
- 'record paths must end in \'.stamp\' so that they are easy to find '
- 'and delete')
-
- self.record_path = record_path
-
- extended_info = []
- outer_md5 = hashlib.md5()
- for i in sorted(input_paths):
- inner_md5 = hashlib.md5()
- _UpdateMd5ForPath(inner_md5, i)
- i = _TrimPathPrefix(i)
- extended_info.append(i + '=' + inner_md5.hexdigest())
- # Include the digest in the overall diff, but not the path
- outer_md5.update(inner_md5.hexdigest())
-
- for s in (str(s) for s in input_strings):
- outer_md5.update(s)
- extended_info.append(s)
-
- self.new_digest = outer_md5.hexdigest()
- self.new_extended_info = extended_info
-
- self.old_digest = ''
- self.old_extended_info = []
- if os.path.exists(self.record_path):
- with open(self.record_path, 'r') as old_record:
- self.old_extended_info = [line.strip() for line in old_record]
- if self.old_extended_info:
- self.old_digest = self.old_extended_info.pop(0)
-
- def Write(self):
- with open(self.record_path, 'w') as new_record:
- new_record.write(self.new_digest)
- new_record.write('\n' + '\n'.join(self.new_extended_info) + '\n')
- def DescribeDifference(self):
- if self.old_digest == self.new_digest:
- return "There's no difference."
- if not self.old_digest:
- return 'Previous stamp file not found.'
- if not self.old_extended_info:
- return 'Previous stamp file lacks extended info.'
- diff = difflib.unified_diff(self.old_extended_info, self.new_extended_info)
- return '\n'.join(diff)
+def _IsZipFile(path):
+ """Returns whether to treat the given file as a zip file."""
+ # ijar doesn't set the CRC32 field.
+ if path.endswith('.interface.jar'):
+ return False
+ return path[-4:] in ('.zip', '.apk', '.jar') or path.endswith('.srcjar')
+
+
+def _ExtractZipEntries(path):
+ """Returns a list of (path, CRC32) of all files within |path|."""
+ entries = []
+ with zipfile.ZipFile(path) as zip_file:
+ for zip_info in zip_file.infolist():
+ # Skip directories and empty files.
+ if zip_info.CRC:
+ entries.append((zip_info.filename, zip_info.CRC))
+ return entries
« no previous file with comments | « build/android/gyp/util/build_utils.py ('k') | build/android/gyp/util/md5_check_test.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698