build/android/gyp/util/md5_check.py - Issue 1361733002: Make javac invocations incremental when possible

Unified Diff: build/android/gyp/util/md5_check.py

Issue 1361733002: Make javac invocations incremental when possible (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@apkbuilder

Patch Set: add flag and disable by default Created 5 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: build/android/gyp/util/md5_check.py

diff --git a/build/android/gyp/util/md5_check.py b/build/android/gyp/util/md5_check.py

index f21e693800ee6fec72c8c9b29dde4e92611e0dba..699d3bfe143b278ef55897b4cae46e38e46071c5 100644

--- a/build/android/gyp/util/md5_check.py

+++ b/build/android/gyp/util/md5_check.py

@@ -4,21 +4,20 @@

import difflib

import hashlib

+import itertools

+import json

import os

-import re

import sys

+import zipfile

# When set and a difference is detected, a diff of what changed is printed.

_PRINT_MD5_DIFFS = int(os.environ.get('PRINT_MD5_DIFFS', 0))

-# Used to strip off temp dir prefix.

-_TEMP_DIR_PATTERN = re.compile(r'^/tmp/.*?/')

def CallAndRecordIfStale(

function, record_path=None, input_paths=None, input_strings=None,

- output_paths=None, force=False):

+ output_paths=None, force=False, pass_changes=False):

"""Calls function if outputs are stale.

Outputs are considered stale if:

@@ -36,34 +35,317 @@ def CallAndRecordIfStale(

input_paths: List of paths to calcualte an md5 sum on.

input_strings: List of strings to record verbatim.

output_paths: List of output paths.

- force: When True, function is always called.

+ force: Whether to treat outputs as missing regardless of whether they

+ actually are.

+ pass_changes: Whether to pass a Changes instance to |function|.

"""

assert record_path or output_paths

input_paths = input_paths or []

input_strings = input_strings or []

output_paths = output_paths or []

record_path = record_path or output_paths[0] + '.md5.stamp'

- md5_checker = _Md5Checker(

- record_path=record_path,

- input_paths=input_paths,

- input_strings=input_strings)

- missing_outputs = [x for x in output_paths if not os.path.exists(x)]

- is_stale = md5_checker.old_digest != md5_checker.new_digest

- if force or missing_outputs or is_stale:

- if _PRINT_MD5_DIFFS:

- print '=' * 80

- print 'Difference found in %s:' % record_path

- if missing_outputs:

- print 'Outputs do not exist:\n' + '\n'.join(missing_outputs)

- elif force:

- print 'force=True'

- else:

- print md5_checker.DescribeDifference()

- print '=' * 80

- function()

- md5_checker.Write()

+ assert record_path.endswith('.stamp'), (

+ 'record paths must end in \'.stamp\' so that they are easy to find '

+ 'and delete')

+ new_metadata = _Metadata()

+ new_metadata.AddStrings(input_strings)

+ for path in input_paths:

+ if _IsZipFile(path):

+ entries = _ExtractZipEntries(path)

+ new_metadata.AddZipFile(path, entries)

+ else:

+ new_metadata.AddFile(path, _Md5ForPath(path))

+ old_metadata = None

+ missing_outputs = [x for x in output_paths if force or not os.path.exists(x)]

+ # When outputs are missing, don't bother gathering change information.

+ if not missing_outputs and os.path.exists(record_path):

+ with open(record_path, 'r') as jsonfile:

+ try:

+ old_metadata = _Metadata.FromFile(jsonfile)

+ except: # pylint: disable=bare-except

+ pass # Not yet using new file format.

+ changes = Changes(old_metadata, new_metadata, force, missing_outputs)

+ if not changes.HasChanges():

+ return

+ if _PRINT_MD5_DIFFS:

+ print '=' * 80

+ print 'Target is stale: %s' % record_path

+ print changes.DescribeDifference()

+ print '=' * 80

+ # Delete the old metdata beforehand since failures leave it in an

+ # inderterminate state.

+ if old_metadata:

+ os.unlink(record_path)

+ args = (changes,) if pass_changes else ()

+ function(*args)

+ with open(record_path, 'w') as f:

+ new_metadata.ToFile(f)

+class Changes(object):

+ """Provides and API for querying what changed between runs."""

+ def __init__(self, old_metadata, new_metadata, force, missing_outputs):

+ self.old_metadata = old_metadata

+ self.new_metadata = new_metadata

+ self.force = force

+ self.missing_outputs = missing_outputs

+ def _GetOldTag(self, path, subpath=None):

+ return self.old_metadata and self.old_metadata.GetTag(path, subpath)

+ def HasChanges(self):

+ """Returns whether any changes exist."""

+ return (self.force or

+ not self.old_metadata or

+ self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5() or

+ self.old_metadata.FilesMd5() != self.new_metadata.FilesMd5())

+ def AddedOrModifiedOnly(self):

+ """Returns whether the only changes were from added or modified (sub)files.

+ No missing outputs, no removed paths/subpaths.

+ """

+ if (self.force or

+ not self.old_metadata or

+ self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5()):

+ return False

+ if any(self.IterRemovedPaths()):

+ return False

+ for path in self.IterModifiedPaths():

+ if any(self.IterRemovedSubpaths(path)):

+ return False

+ return True

+ def IterAddedPaths(self):

+ """Generator for paths that were added."""

+ for path in self.new_metadata.IterPaths():

+ if self._GetOldTag(path) is None:

+ yield path

+ def IterAddedSubpaths(self, path):

+ """Generator for paths that were added within the given zip file."""

+ for subpath in self.new_metadata.IterSubpaths(path):

+ if self._GetOldTag(path, subpath) is None:

+ yield subpath

+ def IterRemovedPaths(self):

+ """Generator for paths that were removed."""

+ if self.old_metadata:

+ for path in self.old_metadata.IterPaths():

+ if self.new_metadata.GetTag(path) is None:

+ yield path

+ def IterRemovedSubpaths(self, path):

+ """Generator for paths that were removed within the given zip file."""

+ if self.old_metadata:

+ for subpath in self.old_metadata.IterSubpaths(path):

+ if self.new_metadata.GetTag(path, subpath) is None:

+ yield subpath

+ def IterModifiedPaths(self):

+ """Generator for paths whose contents have changed."""

+ for path in self.new_metadata.IterPaths():

+ old_tag = self._GetOldTag(path)

+ new_tag = self.new_metadata.GetTag(path)

+ if old_tag is not None and old_tag != new_tag:

+ yield path

+ def IterModifiedSubpaths(self, path):

+ """Generator for paths within a zip file whose contents have changed."""

+ for subpath in self.new_metadata.IterSubpaths(path):

+ old_tag = self._GetOldTag(path, subpath)

+ new_tag = self.new_metadata.GetTag(path, subpath)

+ if old_tag is not None and old_tag != new_tag:

+ yield subpath

+ def IterChangedPaths(self):

+ """Generator for all changed paths (added/removed/modified)."""

+ return itertools.chain(self.IterRemovedPaths(),

+ self.IterModifiedPaths(),

+ self.IterAddedPaths())

+ def IterChangedSubpaths(self, path):

+ """Generator for paths within a zip that were added/removed/modified."""

+ return itertools.chain(self.IterRemovedSubpaths(path),

+ self.IterModifiedSubpaths(path),

+ self.IterAddedSubpaths(path))

+ def DescribeDifference(self):

+ """Returns a human-readable description of what changed."""

+ if self.force:

+ return 'force=True'

+ elif self.missing_outputs:

+ return 'Outputs do not exist:\n ' + '\n '.join(self.missing_outputs)

+ elif self.old_metadata is None:

+ return 'Previous stamp file not found.'

+ if self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5():

+ ndiff = difflib.ndiff(self.old_metadata.GetStrings(),

+ self.new_metadata.GetStrings())

+ changed = [s for s in ndiff if not s.startswith(' ')]

+ return 'Input strings changed:\n ' + '\n '.join(changed)

+ if self.old_metadata.FilesMd5() == self.new_metadata.FilesMd5():

+ return "There's no difference."

+ lines = []

+ lines.extend('Added: ' + p for p in self.IterAddedPaths())

+ lines.extend('Removed: ' + p for p in self.IterRemovedPaths())

+ for path in self.IterModifiedPaths():

+ lines.append('Modified: ' + path)

+ lines.extend(' -> Subpath added: ' + p

+ for p in self.IterAddedSubpaths(path))

+ lines.extend(' -> Subpath removed: ' + p

+ for p in self.IterRemovedSubpaths(path))

+ lines.extend(' -> Subpath modified: ' + p

+ for p in self.IterModifiedSubpaths(path))

+ if lines:

+ return 'Input files changed:\n ' + '\n '.join(lines)

+ return 'I have no idea what changed (there is a bug).'

+class _Metadata(object):

+ """Data model for tracking change metadata."""

+ # Schema:

+ # {

+ # "files-md5": "VALUE",

+ # "strings-md5": "VALUE",

+ # "input-files": [

+ # {

+ # "path": "path.jar",

+ # "tag": "{MD5 of entries}",

+ # "entries": [

+ # { "path": "org/chromium/base/Foo.class", "tag": "{CRC32}" }, ...

+ # ]

+ # }, {

+ # "path": "path.txt",

+ # "tag": "{MD5}",

+ # }

+ # ],

+ # "input-strings": ["a", "b", ...],

+ # }

+ def __init__(self):

+ self._files_md5 = None

+ self._strings_md5 = None

+ self._files = []

+ self._strings = []

+ # Map of (path, subpath) -> entry. Created upon first call to _GetEntry().

+ self._file_map = None

+ @classmethod

+ def FromFile(cls, fileobj):

+ """Returns a _Metadata initialized from a file object."""

+ ret = cls()

+ obj = json.load(fileobj)

+ ret._files_md5 = obj['files-md5']

+ ret._strings_md5 = obj['strings-md5']

+ ret._files = obj['input-files']

+ ret._strings = obj['input-strings']

+ return ret

+ def ToFile(self, fileobj):

+ """Serializes metadata to the given file object."""

+ obj = {

+ "files-md5": self.FilesMd5(),

+ "strings-md5": self.StringsMd5(),

+ "input-files": self._files,

+ "input-strings": self._strings,

+ }

+ json.dump(obj, fileobj, indent=2)

+ def _AssertNotQueried(self):

+ assert self._files_md5 is None

+ assert self._strings_md5 is None

+ assert self._file_map is None

+ def AddStrings(self, values):

+ self._AssertNotQueried()

+ self._strings.extend(str(v) for v in values)

+ def AddFile(self, path, tag):

+ """Adds metadata for a non-zip file.

+ Args:

+ path: Path to the file.

+ tag: A short string representative of the file contents.

+ """

+ self._AssertNotQueried()

+ self._files.append({

+ 'path': path,

+ 'tag': tag,

+ })

+ def AddZipFile(self, path, entries):

+ """Adds metadata for a zip file.

+ Args:

+ path: Path to the file.

+ entries: List of (subpath, tag) tuples for entries within the zip.

+ """

+ self._AssertNotQueried()

+ tag = _ComputeInlineMd5(itertools.chain((e[0] for e in entries),

+ (e[1] for e in entries)))

+ self._files.append({

+ 'path': path,

+ 'tag': tag,

+ 'entries': [{"path": e[0], "tag": e[1]} for e in entries],

+ })

+ def GetStrings(self):

+ """Returns the list of input strings."""

+ return self._strings

+ def FilesMd5(self):

+ """Lazily computes and returns the aggregate md5 of input files."""

+ if self._files_md5 is None:

+ # Omit paths from md5 since temporary files have random names.

+ self._files_md5 = _ComputeInlineMd5(

+ self.GetTag(p) for p in sorted(self.IterPaths()))

+ return self._files_md5

+ def StringsMd5(self):

+ """Lazily computes and returns the aggregate md5 of input strings."""

+ if self._strings_md5 is None:

+ self._strings_md5 = _ComputeInlineMd5(self._strings)

+ return self._strings_md5

+ def _GetEntry(self, path, subpath=None):

+ """Returns the JSON entry for the given path / subpath."""

+ if self._file_map is None:

+ self._file_map = {}

+ for entry in self._files:

+ self._file_map[(entry['path'], None)] = entry

+ for subentry in entry.get('entries', ()):

+ self._file_map[(entry['path'], subentry['path'])] = subentry

+ return self._file_map.get((path, subpath))

+ def GetTag(self, path, subpath=None):

+ """Returns the tag for the given path / subpath."""

+ ret = self._GetEntry(path, subpath)

+ return ret and ret['tag']

+ def IterPaths(self):

+ """Returns a generator for all top-level paths."""

+ return (e['path'] for e in self._files)

+ def IterSubpaths(self, path):

+ """Returns a generator for all subpaths in the given zip.

+ If the given path is not a zip file, returns an empty generator.

+ """

+ outer_entry = self._GetEntry(path)

+ subentries = outer_entry.get('entries', [])

+ return (entry['path'] for entry in subentries)

def _UpdateMd5ForFile(md5, path, block_size=2**16):

@@ -81,70 +363,37 @@ def _UpdateMd5ForDirectory(md5, dir_path):

_UpdateMd5ForFile(md5, os.path.join(root, f))

-def _UpdateMd5ForPath(md5, path):

+def _Md5ForPath(path):

+ md5 = hashlib.md5()

if os.path.isdir(path):

_UpdateMd5ForDirectory(md5, path)

else:

_UpdateMd5ForFile(md5, path)

+ return md5.hexdigest()

-def _TrimPathPrefix(path):

- """Attempts to remove temp dir prefix from the path.

+def _ComputeInlineMd5(iterable):

+ """Computes the md5 of the concatenated parameters."""

+ md5 = hashlib.md5()

+ for item in iterable:

+ md5.update(str(item))

+ return md5.hexdigest()

- Use this only for extended_info (not for the actual md5).

- """

- return _TEMP_DIR_PATTERN.sub('{TMP}', path)

-class _Md5Checker(object):

- def __init__(self, record_path=None, input_paths=None, input_strings=None):

- if not input_paths:

- input_paths = []

- if not input_strings:

- input_strings = []

- assert record_path.endswith('.stamp'), (

- 'record paths must end in \'.stamp\' so that they are easy to find '

- 'and delete')

- self.record_path = record_path

- extended_info = []

- outer_md5 = hashlib.md5()

- for i in sorted(input_paths):

- inner_md5 = hashlib.md5()

- _UpdateMd5ForPath(inner_md5, i)

- i = _TrimPathPrefix(i)

- extended_info.append(i + '=' + inner_md5.hexdigest())

- # Include the digest in the overall diff, but not the path

- outer_md5.update(inner_md5.hexdigest())

- for s in (str(s) for s in input_strings):

- outer_md5.update(s)

- extended_info.append(s)

- self.new_digest = outer_md5.hexdigest()

- self.new_extended_info = extended_info

- self.old_digest = ''

- self.old_extended_info = []

- if os.path.exists(self.record_path):

- with open(self.record_path, 'r') as old_record:

- self.old_extended_info = [line.strip() for line in old_record]

- if self.old_extended_info:

- self.old_digest = self.old_extended_info.pop(0)

- def Write(self):

- with open(self.record_path, 'w') as new_record:

- new_record.write(self.new_digest)

- new_record.write('\n' + '\n'.join(self.new_extended_info) + '\n')

- def DescribeDifference(self):

- if self.old_digest == self.new_digest:

- return "There's no difference."

- if not self.old_digest:

- return 'Previous stamp file not found.'

- if not self.old_extended_info:

- return 'Previous stamp file lacks extended info.'

- diff = difflib.unified_diff(self.old_extended_info, self.new_extended_info)

- return '\n'.join(diff)

+def _IsZipFile(path):

+ """Returns whether to treat the given file as a zip file."""

+ # ijar doesn't set the CRC32 field.

+ if path.endswith('.interface.jar'):

+ return False

+ return path[-4:] in ('.zip', '.apk', '.jar') or path.endswith('.srcjar')

+def _ExtractZipEntries(path):

+ """Returns a list of (path, CRC32) of all files within |path|."""

+ entries = []

+ with zipfile.ZipFile(path) as zip_file:

+ for zip_info in zip_file.infolist():

+ # Skip directories and empty files.

+ if zip_info.CRC:

+ entries.append((zip_info.filename, zip_info.CRC))

+ return entries

« no previous file with comments | « build/android/gyp/util/build_utils.py ('k') | build/android/gyp/util/md5_check_test.py » ('j') | no next file with comments »