Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(398)

Side by Side Diff: build/android/gyp/util/md5_check.py

Issue 1361733002: Make javac invocations incremental when possible (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@apkbuilder
Patch Set: add flag and disable by default Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « build/android/gyp/util/build_utils.py ('k') | build/android/gyp/util/md5_check_test.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2013 The Chromium Authors. All rights reserved. 1 # Copyright 2013 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import difflib 5 import difflib
6 import hashlib 6 import hashlib
7 import itertools
8 import json
7 import os 9 import os
8 import re
9 import sys 10 import sys
11 import zipfile
10 12
11 13
12 # When set and a difference is detected, a diff of what changed is printed. 14 # When set and a difference is detected, a diff of what changed is printed.
13 _PRINT_MD5_DIFFS = int(os.environ.get('PRINT_MD5_DIFFS', 0)) 15 _PRINT_MD5_DIFFS = int(os.environ.get('PRINT_MD5_DIFFS', 0))
14 16
15 # Used to strip off temp dir prefix.
16 _TEMP_DIR_PATTERN = re.compile(r'^/tmp/.*?/')
17
18 17
19 def CallAndRecordIfStale( 18 def CallAndRecordIfStale(
20 function, record_path=None, input_paths=None, input_strings=None, 19 function, record_path=None, input_paths=None, input_strings=None,
21 output_paths=None, force=False): 20 output_paths=None, force=False, pass_changes=False):
22 """Calls function if outputs are stale. 21 """Calls function if outputs are stale.
23 22
24 Outputs are considered stale if: 23 Outputs are considered stale if:
25 - any output_paths are missing, or 24 - any output_paths are missing, or
26 - the contents of any file within input_paths has changed, or 25 - the contents of any file within input_paths has changed, or
27 - the contents of input_strings has changed. 26 - the contents of input_strings has changed.
28 27
29 To debug which files are out-of-date, set the environment variable: 28 To debug which files are out-of-date, set the environment variable:
30 PRINT_MD5_DIFFS=1 29 PRINT_MD5_DIFFS=1
31 30
32 Args: 31 Args:
33 function: The function to call. 32 function: The function to call.
34 record_path: Path to record metadata. 33 record_path: Path to record metadata.
35 Defaults to output_paths[0] + '.md5.stamp' 34 Defaults to output_paths[0] + '.md5.stamp'
36 input_paths: List of paths to calcualte an md5 sum on. 35 input_paths: List of paths to calcualte an md5 sum on.
37 input_strings: List of strings to record verbatim. 36 input_strings: List of strings to record verbatim.
38 output_paths: List of output paths. 37 output_paths: List of output paths.
39 force: When True, function is always called. 38 force: Whether to treat outputs as missing regardless of whether they
39 actually are.
40 pass_changes: Whether to pass a Changes instance to |function|.
40 """ 41 """
41 assert record_path or output_paths 42 assert record_path or output_paths
42 input_paths = input_paths or [] 43 input_paths = input_paths or []
43 input_strings = input_strings or [] 44 input_strings = input_strings or []
44 output_paths = output_paths or [] 45 output_paths = output_paths or []
45 record_path = record_path or output_paths[0] + '.md5.stamp' 46 record_path = record_path or output_paths[0] + '.md5.stamp'
46 md5_checker = _Md5Checker( 47
47 record_path=record_path, 48 assert record_path.endswith('.stamp'), (
48 input_paths=input_paths, 49 'record paths must end in \'.stamp\' so that they are easy to find '
49 input_strings=input_strings) 50 'and delete')
50 51
51 missing_outputs = [x for x in output_paths if not os.path.exists(x)] 52 new_metadata = _Metadata()
52 is_stale = md5_checker.old_digest != md5_checker.new_digest 53 new_metadata.AddStrings(input_strings)
53 54
54 if force or missing_outputs or is_stale: 55 for path in input_paths:
55 if _PRINT_MD5_DIFFS: 56 if _IsZipFile(path):
56 print '=' * 80 57 entries = _ExtractZipEntries(path)
57 print 'Difference found in %s:' % record_path 58 new_metadata.AddZipFile(path, entries)
58 if missing_outputs: 59 else:
59 print 'Outputs do not exist:\n' + '\n'.join(missing_outputs) 60 new_metadata.AddFile(path, _Md5ForPath(path))
60 elif force: 61
61 print 'force=True' 62 old_metadata = None
62 else: 63 missing_outputs = [x for x in output_paths if force or not os.path.exists(x)]
63 print md5_checker.DescribeDifference() 64 # When outputs are missing, don't bother gathering change information.
64 print '=' * 80 65 if not missing_outputs and os.path.exists(record_path):
65 function() 66 with open(record_path, 'r') as jsonfile:
66 md5_checker.Write() 67 try:
68 old_metadata = _Metadata.FromFile(jsonfile)
69 except: # pylint: disable=bare-except
70 pass # Not yet using new file format.
71
72 changes = Changes(old_metadata, new_metadata, force, missing_outputs)
73 if not changes.HasChanges():
74 return
75
76 if _PRINT_MD5_DIFFS:
77 print '=' * 80
78 print 'Target is stale: %s' % record_path
79 print changes.DescribeDifference()
80 print '=' * 80
81
82 # Delete the old metdata beforehand since failures leave it in an
83 # inderterminate state.
84 if old_metadata:
85 os.unlink(record_path)
86
87 args = (changes,) if pass_changes else ()
88 function(*args)
89
90 with open(record_path, 'w') as f:
91 new_metadata.ToFile(f)
92
93
94 class Changes(object):
95 """Provides and API for querying what changed between runs."""
96
97 def __init__(self, old_metadata, new_metadata, force, missing_outputs):
98 self.old_metadata = old_metadata
99 self.new_metadata = new_metadata
100 self.force = force
101 self.missing_outputs = missing_outputs
102
103 def _GetOldTag(self, path, subpath=None):
104 return self.old_metadata and self.old_metadata.GetTag(path, subpath)
105
106 def HasChanges(self):
107 """Returns whether any changes exist."""
108 return (self.force or
109 not self.old_metadata or
110 self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5() or
111 self.old_metadata.FilesMd5() != self.new_metadata.FilesMd5())
112
113 def AddedOrModifiedOnly(self):
114 """Returns whether the only changes were from added or modified (sub)files.
115
116 No missing outputs, no removed paths/subpaths.
117 """
118 if (self.force or
119 not self.old_metadata or
120 self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5()):
121 return False
122 if any(self.IterRemovedPaths()):
123 return False
124 for path in self.IterModifiedPaths():
125 if any(self.IterRemovedSubpaths(path)):
126 return False
127 return True
128
129 def IterAddedPaths(self):
130 """Generator for paths that were added."""
131 for path in self.new_metadata.IterPaths():
132 if self._GetOldTag(path) is None:
133 yield path
134
135 def IterAddedSubpaths(self, path):
136 """Generator for paths that were added within the given zip file."""
137 for subpath in self.new_metadata.IterSubpaths(path):
138 if self._GetOldTag(path, subpath) is None:
139 yield subpath
140
141 def IterRemovedPaths(self):
142 """Generator for paths that were removed."""
143 if self.old_metadata:
144 for path in self.old_metadata.IterPaths():
145 if self.new_metadata.GetTag(path) is None:
146 yield path
147
148 def IterRemovedSubpaths(self, path):
149 """Generator for paths that were removed within the given zip file."""
150 if self.old_metadata:
151 for subpath in self.old_metadata.IterSubpaths(path):
152 if self.new_metadata.GetTag(path, subpath) is None:
153 yield subpath
154
155 def IterModifiedPaths(self):
156 """Generator for paths whose contents have changed."""
157 for path in self.new_metadata.IterPaths():
158 old_tag = self._GetOldTag(path)
159 new_tag = self.new_metadata.GetTag(path)
160 if old_tag is not None and old_tag != new_tag:
161 yield path
162
163 def IterModifiedSubpaths(self, path):
164 """Generator for paths within a zip file whose contents have changed."""
165 for subpath in self.new_metadata.IterSubpaths(path):
166 old_tag = self._GetOldTag(path, subpath)
167 new_tag = self.new_metadata.GetTag(path, subpath)
168 if old_tag is not None and old_tag != new_tag:
169 yield subpath
170
171 def IterChangedPaths(self):
172 """Generator for all changed paths (added/removed/modified)."""
173 return itertools.chain(self.IterRemovedPaths(),
174 self.IterModifiedPaths(),
175 self.IterAddedPaths())
176
177 def IterChangedSubpaths(self, path):
178 """Generator for paths within a zip that were added/removed/modified."""
179 return itertools.chain(self.IterRemovedSubpaths(path),
180 self.IterModifiedSubpaths(path),
181 self.IterAddedSubpaths(path))
182
183 def DescribeDifference(self):
184 """Returns a human-readable description of what changed."""
185 if self.force:
186 return 'force=True'
187 elif self.missing_outputs:
188 return 'Outputs do not exist:\n ' + '\n '.join(self.missing_outputs)
189 elif self.old_metadata is None:
190 return 'Previous stamp file not found.'
191
192 if self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5():
193 ndiff = difflib.ndiff(self.old_metadata.GetStrings(),
194 self.new_metadata.GetStrings())
195 changed = [s for s in ndiff if not s.startswith(' ')]
196 return 'Input strings changed:\n ' + '\n '.join(changed)
197
198 if self.old_metadata.FilesMd5() == self.new_metadata.FilesMd5():
199 return "There's no difference."
200
201 lines = []
202 lines.extend('Added: ' + p for p in self.IterAddedPaths())
203 lines.extend('Removed: ' + p for p in self.IterRemovedPaths())
204 for path in self.IterModifiedPaths():
205 lines.append('Modified: ' + path)
206 lines.extend(' -> Subpath added: ' + p
207 for p in self.IterAddedSubpaths(path))
208 lines.extend(' -> Subpath removed: ' + p
209 for p in self.IterRemovedSubpaths(path))
210 lines.extend(' -> Subpath modified: ' + p
211 for p in self.IterModifiedSubpaths(path))
212 if lines:
213 return 'Input files changed:\n ' + '\n '.join(lines)
214 return 'I have no idea what changed (there is a bug).'
215
216
217 class _Metadata(object):
218 """Data model for tracking change metadata."""
219 # Schema:
220 # {
221 # "files-md5": "VALUE",
222 # "strings-md5": "VALUE",
223 # "input-files": [
224 # {
225 # "path": "path.jar",
226 # "tag": "{MD5 of entries}",
227 # "entries": [
228 # { "path": "org/chromium/base/Foo.class", "tag": "{CRC32}" }, ...
229 # ]
230 # }, {
231 # "path": "path.txt",
232 # "tag": "{MD5}",
233 # }
234 # ],
235 # "input-strings": ["a", "b", ...],
236 # }
237 def __init__(self):
238 self._files_md5 = None
239 self._strings_md5 = None
240 self._files = []
241 self._strings = []
242 # Map of (path, subpath) -> entry. Created upon first call to _GetEntry().
243 self._file_map = None
244
245 @classmethod
246 def FromFile(cls, fileobj):
247 """Returns a _Metadata initialized from a file object."""
248 ret = cls()
249 obj = json.load(fileobj)
250 ret._files_md5 = obj['files-md5']
251 ret._strings_md5 = obj['strings-md5']
252 ret._files = obj['input-files']
253 ret._strings = obj['input-strings']
254 return ret
255
256 def ToFile(self, fileobj):
257 """Serializes metadata to the given file object."""
258 obj = {
259 "files-md5": self.FilesMd5(),
260 "strings-md5": self.StringsMd5(),
261 "input-files": self._files,
262 "input-strings": self._strings,
263 }
264 json.dump(obj, fileobj, indent=2)
265
266 def _AssertNotQueried(self):
267 assert self._files_md5 is None
268 assert self._strings_md5 is None
269 assert self._file_map is None
270
271 def AddStrings(self, values):
272 self._AssertNotQueried()
273 self._strings.extend(str(v) for v in values)
274
275 def AddFile(self, path, tag):
276 """Adds metadata for a non-zip file.
277
278 Args:
279 path: Path to the file.
280 tag: A short string representative of the file contents.
281 """
282 self._AssertNotQueried()
283 self._files.append({
284 'path': path,
285 'tag': tag,
286 })
287
288 def AddZipFile(self, path, entries):
289 """Adds metadata for a zip file.
290
291 Args:
292 path: Path to the file.
293 entries: List of (subpath, tag) tuples for entries within the zip.
294 """
295 self._AssertNotQueried()
296 tag = _ComputeInlineMd5(itertools.chain((e[0] for e in entries),
297 (e[1] for e in entries)))
298 self._files.append({
299 'path': path,
300 'tag': tag,
301 'entries': [{"path": e[0], "tag": e[1]} for e in entries],
302 })
303
304 def GetStrings(self):
305 """Returns the list of input strings."""
306 return self._strings
307
308 def FilesMd5(self):
309 """Lazily computes and returns the aggregate md5 of input files."""
310 if self._files_md5 is None:
311 # Omit paths from md5 since temporary files have random names.
312 self._files_md5 = _ComputeInlineMd5(
313 self.GetTag(p) for p in sorted(self.IterPaths()))
314 return self._files_md5
315
316 def StringsMd5(self):
317 """Lazily computes and returns the aggregate md5 of input strings."""
318 if self._strings_md5 is None:
319 self._strings_md5 = _ComputeInlineMd5(self._strings)
320 return self._strings_md5
321
322 def _GetEntry(self, path, subpath=None):
323 """Returns the JSON entry for the given path / subpath."""
324 if self._file_map is None:
325 self._file_map = {}
326 for entry in self._files:
327 self._file_map[(entry['path'], None)] = entry
328 for subentry in entry.get('entries', ()):
329 self._file_map[(entry['path'], subentry['path'])] = subentry
330 return self._file_map.get((path, subpath))
331
332 def GetTag(self, path, subpath=None):
333 """Returns the tag for the given path / subpath."""
334 ret = self._GetEntry(path, subpath)
335 return ret and ret['tag']
336
337 def IterPaths(self):
338 """Returns a generator for all top-level paths."""
339 return (e['path'] for e in self._files)
340
341 def IterSubpaths(self, path):
342 """Returns a generator for all subpaths in the given zip.
343
344 If the given path is not a zip file, returns an empty generator.
345 """
346 outer_entry = self._GetEntry(path)
347 subentries = outer_entry.get('entries', [])
348 return (entry['path'] for entry in subentries)
67 349
68 350
69 def _UpdateMd5ForFile(md5, path, block_size=2**16): 351 def _UpdateMd5ForFile(md5, path, block_size=2**16):
70 with open(path, 'rb') as infile: 352 with open(path, 'rb') as infile:
71 while True: 353 while True:
72 data = infile.read(block_size) 354 data = infile.read(block_size)
73 if not data: 355 if not data:
74 break 356 break
75 md5.update(data) 357 md5.update(data)
76 358
77 359
78 def _UpdateMd5ForDirectory(md5, dir_path): 360 def _UpdateMd5ForDirectory(md5, dir_path):
79 for root, _, files in os.walk(dir_path): 361 for root, _, files in os.walk(dir_path):
80 for f in files: 362 for f in files:
81 _UpdateMd5ForFile(md5, os.path.join(root, f)) 363 _UpdateMd5ForFile(md5, os.path.join(root, f))
82 364
83 365
84 def _UpdateMd5ForPath(md5, path): 366 def _Md5ForPath(path):
367 md5 = hashlib.md5()
85 if os.path.isdir(path): 368 if os.path.isdir(path):
86 _UpdateMd5ForDirectory(md5, path) 369 _UpdateMd5ForDirectory(md5, path)
87 else: 370 else:
88 _UpdateMd5ForFile(md5, path) 371 _UpdateMd5ForFile(md5, path)
372 return md5.hexdigest()
89 373
90 374
91 def _TrimPathPrefix(path): 375 def _ComputeInlineMd5(iterable):
92 """Attempts to remove temp dir prefix from the path. 376 """Computes the md5 of the concatenated parameters."""
93 377 md5 = hashlib.md5()
94 Use this only for extended_info (not for the actual md5). 378 for item in iterable:
95 """ 379 md5.update(str(item))
96 return _TEMP_DIR_PATTERN.sub('{TMP}', path) 380 return md5.hexdigest()
97 381
98 382
99 class _Md5Checker(object): 383 def _IsZipFile(path):
100 def __init__(self, record_path=None, input_paths=None, input_strings=None): 384 """Returns whether to treat the given file as a zip file."""
101 if not input_paths: 385 # ijar doesn't set the CRC32 field.
102 input_paths = [] 386 if path.endswith('.interface.jar'):
103 if not input_strings: 387 return False
104 input_strings = [] 388 return path[-4:] in ('.zip', '.apk', '.jar') or path.endswith('.srcjar')
105 389
106 assert record_path.endswith('.stamp'), (
107 'record paths must end in \'.stamp\' so that they are easy to find '
108 'and delete')
109 390
110 self.record_path = record_path 391 def _ExtractZipEntries(path):
111 392 """Returns a list of (path, CRC32) of all files within |path|."""
112 extended_info = [] 393 entries = []
113 outer_md5 = hashlib.md5() 394 with zipfile.ZipFile(path) as zip_file:
114 for i in sorted(input_paths): 395 for zip_info in zip_file.infolist():
115 inner_md5 = hashlib.md5() 396 # Skip directories and empty files.
116 _UpdateMd5ForPath(inner_md5, i) 397 if zip_info.CRC:
117 i = _TrimPathPrefix(i) 398 entries.append((zip_info.filename, zip_info.CRC))
118 extended_info.append(i + '=' + inner_md5.hexdigest()) 399 return entries
119 # Include the digest in the overall diff, but not the path
120 outer_md5.update(inner_md5.hexdigest())
121
122 for s in (str(s) for s in input_strings):
123 outer_md5.update(s)
124 extended_info.append(s)
125
126 self.new_digest = outer_md5.hexdigest()
127 self.new_extended_info = extended_info
128
129 self.old_digest = ''
130 self.old_extended_info = []
131 if os.path.exists(self.record_path):
132 with open(self.record_path, 'r') as old_record:
133 self.old_extended_info = [line.strip() for line in old_record]
134 if self.old_extended_info:
135 self.old_digest = self.old_extended_info.pop(0)
136
137 def Write(self):
138 with open(self.record_path, 'w') as new_record:
139 new_record.write(self.new_digest)
140 new_record.write('\n' + '\n'.join(self.new_extended_info) + '\n')
141
142 def DescribeDifference(self):
143 if self.old_digest == self.new_digest:
144 return "There's no difference."
145 if not self.old_digest:
146 return 'Previous stamp file not found.'
147 if not self.old_extended_info:
148 return 'Previous stamp file lacks extended info.'
149 diff = difflib.unified_diff(self.old_extended_info, self.new_extended_info)
150 return '\n'.join(diff)
OLDNEW
« no previous file with comments | « build/android/gyp/util/build_utils.py ('k') | build/android/gyp/util/md5_check_test.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698