Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(241)

Side by Side Diff: build/android/gyp/util/md5_check.py

Issue 1361733002: Make javac invocations incremental when possible (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@apkbuilder
Patch Set: Revert write_build_config.py change (wrong CL) Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2013 The Chromium Authors. All rights reserved. 1 # Copyright 2013 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import difflib 5 import difflib
6 import hashlib 6 import hashlib
7 import itertools
8 import json
7 import os 9 import os
8 import re
9 import sys 10 import sys
11 import zipfile
10 12
11 13
12 # When set and a difference is detected, a diff of what changed is printed. 14 # When set and a difference is detected, a diff of what changed is printed.
13 _PRINT_MD5_DIFFS = int(os.environ.get('PRINT_MD5_DIFFS', 0)) 15 _PRINT_MD5_DIFFS = int(os.environ.get('PRINT_MD5_DIFFS', 0))
14 16
15 # Used to strip off temp dir prefix.
16 _TEMP_DIR_PATTERN = re.compile(r'^/tmp/.*?/')
17
18 17
19 def CallAndRecordIfStale( 18 def CallAndRecordIfStale(
20 function, record_path=None, input_paths=None, input_strings=None, 19 function, record_path=None, input_paths=None, input_strings=None,
21 output_paths=None, force=False): 20 output_paths=None, force=False, pass_changes=False):
22 """Calls function if outputs are stale. 21 """Calls function if outputs are stale.
23 22
24 Outputs are considered stale if: 23 Outputs are considered stale if:
25 - any output_paths are missing, or 24 - any output_paths are missing, or
26 - the contents of any file within input_paths has changed, or 25 - the contents of any file within input_paths has changed, or
27 - the contents of input_strings has changed. 26 - the contents of input_strings has changed.
28 27
29 To debug which files are out-of-date, set the environment variable: 28 To debug which files are out-of-date, set the environment variable:
30 PRINT_MD5_DIFFS=1 29 PRINT_MD5_DIFFS=1
31 30
32 Args: 31 Args:
33 function: The function to call. 32 function: The function to call.
34 record_path: Path to record metadata. 33 record_path: Path to record metadata.
35 Defaults to output_paths[0] + '.md5.stamp' 34 Defaults to output_paths[0] + '.md5.stamp'
36 input_paths: List of paths to calcualte an md5 sum on. 35 input_paths: List of paths to calcualte an md5 sum on.
37 input_strings: List of strings to record verbatim. 36 input_strings: List of strings to record verbatim.
38 output_paths: List of output paths. 37 output_paths: List of output paths.
39 force: When True, function is always called. 38 force: Whether to treat outputs as missing regardless of whether they
39 actually are.
40 pass_changes: Whether to pass a Changes instance to |function|.
40 """ 41 """
41 input_paths = input_paths or [] 42 input_paths = input_paths or []
42 input_strings = input_strings or [] 43 input_strings = input_strings or []
43 output_paths = output_paths or [] 44 output_paths = output_paths or []
44 record_path = record_path or output_paths[0] + '.md5.stamp' 45 record_path = record_path or output_paths[0] + '.md5.stamp'
45 md5_checker = _Md5Checker( 46
46 record_path=record_path, 47 assert record_path.endswith('.stamp'), (
47 input_paths=input_paths, 48 'record paths must end in \'.stamp\' so that they are easy to find '
48 input_strings=input_strings) 49 'and delete')
49 50
50 missing_outputs = [x for x in output_paths if not os.path.exists(x)] 51 new_metadata = _Metadata()
51 is_stale = md5_checker.old_digest != md5_checker.new_digest 52 new_metadata.AddStrings(input_strings)
52 53
53 if force or missing_outputs or is_stale: 54 for path in input_paths:
54 if _PRINT_MD5_DIFFS: 55 if _IsZipFile(path):
55 print '=' * 80 56 entries = _ExtractZipEntries(path)
56 print 'Difference found in %s:' % record_path 57 new_metadata.AddZipFile(path, entries)
57 if missing_outputs: 58 else:
58 print 'Outputs do not exist:\n' + '\n'.join(missing_outputs) 59 new_metadata.AddFile(path, _Md5ForPath(path))
59 elif force: 60
60 print 'force=True' 61 old_metadata = None
61 else: 62 missing_outputs = [x for x in output_paths if force or not os.path.exists(x)]
62 print md5_checker.DescribeDifference() 63 # When outputs are missing, don't bother gathering change information.
63 print '=' * 80 64 if not missing_outputs and os.path.exists(record_path):
64 function() 65 with open(record_path, 'r') as jsonfile:
65 md5_checker.Write() 66 try:
67 old_metadata = _Metadata.FromFile(jsonfile)
68 except: # pylint: disable=bare-except
69 pass # Not yet using new file format.
70
71 changes = Changes(old_metadata, new_metadata, force, missing_outputs)
72 if not changes.HasChanges():
73 return
74
75 if _PRINT_MD5_DIFFS:
76 print '=' * 80
77 print 'Target is stale: %s' % record_path
78 print changes.DescribeDifference()
79 print '=' * 80
80
81 # Delete the old metdata beforehand since failures leave it in an
82 # inderterminate state.
83 if old_metadata:
84 os.unlink(record_path)
85
86 args = (changes,) if pass_changes else ()
87 function(*args)
88
89 with open(record_path, 'w') as f:
90 new_metadata.Write(f)
91
92
93 class Changes(object):
94 """Provides and API for querying what changed between runs."""
95
96 def __init__(self, old_metadata, new_metadata, force, missing_outputs):
97 self.old_metadata = old_metadata
98 self.new_metadata = new_metadata
99 self.force = force
100 self.missing_outputs = missing_outputs
101
102 def _GetOldTag(self, path, subpath=None):
103 return self.old_metadata and self.old_metadata.GetTag(path, subpath)
104
105 def HasChanges(self):
106 """Returns whether any changes exist."""
107 return (self.force or
108 self.old_metadata is None or
jbudorick 2015/09/23 00:26:20 not self.old_metadata?
agrieve 2015/09/23 02:07:56 Done.
109 self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5() or
110 self.old_metadata.FilesMd5() != self.new_metadata.FilesMd5())
111
112 def AddedOrModifiedOnly(self):
113 """Returns whether the only changes were from added or modified (sub)files.
114
115 No missing outputs, no removed paths/subpaths.
116 """
117 if (self.force or
118 not self.old_metadata or
119 self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5()):
120 return False
121 if any(self.IterRemovedPaths()):
122 return False
123 for path in self.IterModifiedPaths():
124 if any(self.IterRemovedSubpaths(path)):
125 return False
126 return True
127
128 def IterAddedPaths(self):
129 """Generator for paths that were added."""
130 for path in self.new_metadata.IterPaths():
131 if self._GetOldTag(path) is None:
132 yield path
133
134 def IterAddedSubpaths(self, path):
135 """Generator for paths that were added within the given zip file."""
136 for subpath in self.new_metadata.IterSubpaths(path):
137 if self._GetOldTag(path, subpath) is None:
138 yield subpath
139
140 def IterRemovedPaths(self):
141 """Generator for paths that were removed."""
142 if self.old_metadata:
143 for path in self.old_metadata.IterPaths():
144 if self.new_metadata.GetTag(path) is None:
145 yield path
146
147 def IterRemovedSubpaths(self, path):
148 """Generator for paths that were removed within the given zip file."""
149 if self.old_metadata:
150 for subpath in self.old_metadata.IterSubpaths(path):
151 if self.new_metadata.GetTag(path, subpath) is None:
152 yield subpath
153
154 def IterModifiedPaths(self):
155 """Generator for paths whose contents have changed."""
156 for path in self.new_metadata.IterPaths():
157 old_tag = self._GetOldTag(path)
158 new_tag = self.new_metadata.GetTag(path)
159 if old_tag is not None and old_tag != new_tag:
160 yield path
161
162 def IterModifiedSubpaths(self, path):
163 """Generator for paths within a zip file whose contents have changed."""
164 for subpath in self.new_metadata.IterSubpaths(path):
165 old_tag = self._GetOldTag(path, subpath)
166 new_tag = self.new_metadata.GetTag(path, subpath)
167 if old_tag is not None and old_tag != new_tag:
168 yield subpath
169
170 def IterChangedPaths(self, path):
171 """Generator for all changed paths (added/removed/modified)."""
172 return itertools.chain(self.IterRemovedPaths(path),
173 self.IterModifiedPaths(path),
174 self.IterAddedPaths(path))
175
176 def IterChangedSubpaths(self, path):
177 """Generator for paths within a zip that were added/removed/modified."""
178 return itertools.chain(self.IterRemovedSubpaths(path),
179 self.IterModifiedSubpaths(path),
180 self.IterAddedSubpaths(path))
181
182 def DescribeDifference(self):
183 """Returns a human-readable description of what changed."""
184 if self.force:
185 return 'force=True'
186 elif self.missing_outputs:
187 return 'Outputs do not exist:\n ' + '\n '.join(self.missing_outputs)
188 elif self.old_metadata is None:
189 return 'Previous stamp file not found.'
190
191 if self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5():
192 ndiff = difflib.ndiff(self.old_metadata.GetStrings(),
193 self.new_metadata.GetStrings())
194 changed = [s for s in ndiff if not s.startswith(' ')]
195 return 'Input strings changed:\n ' + '\n '.join(changed)
196
197 if self.old_metadata.FilesMd5() == self.new_metadata.FilesMd5():
198 return "There's no difference."
199
200 lines = []
201 lines.extend('Added: ' + p for p in self.IterAddedPaths())
202 lines.extend('Removed: ' + p for p in self.IterRemovedPaths())
203 for path in self.IterModifiedPaths():
204 lines.append('Modified: ' + path)
205 lines.extend(' -> Subpath added: ' + p
206 for p in self.IterAddedSubpaths(path))
207 lines.extend(' -> Subpath removed: ' + p
208 for p in self.IterRemovedSubpaths(path))
209 lines.extend(' -> Subpath modified: ' + p
210 for p in self.IterModifiedSubpaths(path))
211 if lines:
212 return 'Input files changed:\n ' + '\n '.join(lines)
213 return 'I have no idea what changed (there is a bug).'
jbudorick 2015/09/23 00:26:20 hahaha
214
215
216 class _Metadata(object):
217 """Data model for tracking change metadata."""
218 # Schema:
219 # {
220 # "files-md5": "VALUE",
221 # "strings-md5": "VALUE",
222 # "input-files": [
223 # {
224 # "path": "path.jar",
225 # "tag": "{MD5 of entries}",
226 # "entries": [
227 # { "path": "org/chromium/base/Foo.class", "tag": "{CRC32}" }, ...
228 # ]
229 # }, {
230 # "path": "path.txt",
231 # "tag": "{MD5}",
jbudorick 2015/09/23 00:26:20 nit: missing closing }
agrieve 2015/09/23 02:07:56 Done.
232 # ],
233 # "input-strings": ["a", "b", ...],
234 # }
235 def __init__(self):
236 self._files_md5 = None
237 self._strings_md5 = None
238 self._files = []
239 self._strings = []
240 # Map of (path, subpath) -> entry. Created upon first call to _GetEntry().
241 self._file_map = None
242
243 @classmethod
244 def FromFile(cls, fileobj):
jbudorick 2015/09/23 00:26:20 not Read/Write or FromFile/ToFile? :(
agrieve 2015/09/23 02:07:56 Done.
245 """Returns a _Metadata initialized from a file object."""
246 ret = cls()
247 obj = json.load(fileobj)
248 ret._files_md5 = obj['files-md5']
249 ret._strings_md5 = obj['strings-md5']
250 ret._files = obj['input-files']
251 ret._strings = obj['input-strings']
252 return ret
253
254 def Write(self, fileobj):
255 """Serializes metadata to the given file object."""
256 obj = {
257 "files-md5": self.FilesMd5(),
258 "strings-md5": self.StringsMd5(),
259 "input-files": self._files,
260 "input-strings": self._strings,
261 }
262 json.dump(obj, fileobj, indent=2)
263
264 def _AssertNotQueried(self):
265 assert self._files_md5 is None
266 assert self._strings_md5 is None
267 assert self._file_map is None
268
269 def AddStrings(self, values):
270 self._AssertNotQueried()
271 self._strings.extend(str(v) for v in values)
272
273 def AddFile(self, path, tag):
274 """Adds metadata for a non-zip file.
275
276 Args:
277 path: Path to the file.
278 tag: A short string representative of the file contents.
279 """
280 self._AssertNotQueried()
281 self._files.append({
282 'path': path,
283 'tag': tag,
284 })
285
286 def AddZipFile(self, path, entries):
287 """Adds metadata for a zip file.
288
289 Args:
290 path: Path to the file.
291 entries: List of (subpath, tag) tuples for entries within the zip.
292 """
293 self._AssertNotQueried()
294 tag = _ComputeInlineMd5(itertools.chain((e[0] for e in entries),
295 (e[1] for e in entries)))
296 self._files.append({
297 'path': path,
298 'tag': tag,
299 'entries': [{"path": e[0], "tag": e[1]} for e in entries],
300 })
301
302 def GetStrings(self):
303 """Returns the list of input strings."""
304 return self._strings
305
306 def FilesMd5(self):
307 """Lazily computes and returns the aggregate md5 of input files."""
308 if self._files_md5 is None:
309 # Omit paths from md5 since temporary files have random names.
310 self._files_md5 = _ComputeInlineMd5(
311 self.GetTag(p) for p in sorted(self.IterPaths()))
312 return self._files_md5
313
314 def StringsMd5(self):
315 """Lazily computes and returns the aggregate md5 of input strings."""
316 if self._strings_md5 is None:
317 self._strings_md5 = _ComputeInlineMd5(self._strings)
318 return self._strings_md5
319
320 def _GetEntry(self, path, subpath=None):
321 """Returns the JSON entry for the given path / subpath."""
322 if self._file_map is None:
323 self._file_map = {}
324 for entry in self._files:
325 self._file_map[(entry['path'], None)] = entry
326 for subentry in entry.get('entries', ()):
327 self._file_map[(entry['path'], subentry['path'])] = subentry
328 return self._file_map.get((path, subpath))
329
330 def GetTag(self, path, subpath=None):
331 """Returns the tag for the given path / subpath."""
332 ret = self._GetEntry(path, subpath)
333 return ret and ret['tag']
334
335 def IterPaths(self):
336 """Returns a generator for all top-level paths."""
337 return (e['path'] for e in self._files)
338
339 def IterSubpaths(self, path):
340 """Returns a generator for all subpaths in the given zip.
341
342 If the given path is not a zip file, returns an empty generator.
343 """
344 outer_entry = self._GetEntry(path)
345 subentries = outer_entry.get('entries', [])
346 return (entry['path'] for entry in subentries)
66 347
67 348
68 def _UpdateMd5ForFile(md5, path, block_size=2**16): 349 def _UpdateMd5ForFile(md5, path, block_size=2**16):
69 with open(path, 'rb') as infile: 350 with open(path, 'rb') as infile:
70 while True: 351 while True:
71 data = infile.read(block_size) 352 data = infile.read(block_size)
72 if not data: 353 if not data:
73 break 354 break
74 md5.update(data) 355 md5.update(data)
75 356
76 357
77 def _UpdateMd5ForDirectory(md5, dir_path): 358 def _UpdateMd5ForDirectory(md5, dir_path):
78 for root, _, files in os.walk(dir_path): 359 for root, _, files in os.walk(dir_path):
79 for f in files: 360 for f in files:
80 _UpdateMd5ForFile(md5, os.path.join(root, f)) 361 _UpdateMd5ForFile(md5, os.path.join(root, f))
81 362
82 363
83 def _UpdateMd5ForPath(md5, path): 364 def _Md5ForPath(path):
365 md5 = hashlib.md5()
84 if os.path.isdir(path): 366 if os.path.isdir(path):
85 _UpdateMd5ForDirectory(md5, path) 367 _UpdateMd5ForDirectory(md5, path)
86 else: 368 else:
87 _UpdateMd5ForFile(md5, path) 369 _UpdateMd5ForFile(md5, path)
370 return md5.hexdigest()
88 371
89 372
90 def _TrimPathPrefix(path): 373 def _ComputeInlineMd5(iterable):
91 """Attempts to remove temp dir prefix from the path. 374 """Computes the md5 of the concatenated parameters."""
92 375 md5 = hashlib.md5()
93 Use this only for extended_info (not for the actual md5). 376 for item in iterable:
94 """ 377 md5.update(str(item))
95 return _TEMP_DIR_PATTERN.sub('{TMP}', path) 378 return md5.hexdigest()
96 379
97 380
98 class _Md5Checker(object): 381 def _IsZipFile(path):
99 def __init__(self, record_path=None, input_paths=None, input_strings=None): 382 """Returns whether to treat the given file as a zip file."""
100 if not input_paths: 383 # ijar doesn't set the CRC32 field.
101 input_paths = [] 384 if path.endswith('.interface.jar'):
102 if not input_strings: 385 return False
103 input_strings = [] 386 return path.endswith('.zip') or path.endswith('.apk') or path.endswith('.jar')
104 387
105 assert record_path.endswith('.stamp'), (
106 'record paths must end in \'.stamp\' so that they are easy to find '
107 'and delete')
108 388
109 self.record_path = record_path 389 def _ExtractZipEntries(path):
110 390 """Returns a list of (path, CRC32) of all files within |path|."""
111 extended_info = [] 391 entries = []
112 outer_md5 = hashlib.md5() 392 with zipfile.ZipFile(path) as zip_file:
113 for i in sorted(input_paths): 393 for zip_info in zip_file.infolist():
114 inner_md5 = hashlib.md5() 394 # Skip directories and empty files.
115 _UpdateMd5ForPath(inner_md5, i) 395 if zip_info.CRC:
116 i = _TrimPathPrefix(i) 396 entries.append((zip_info.filename, zip_info.CRC))
117 extended_info.append(i + '=' + inner_md5.hexdigest()) 397 return entries
118 # Include the digest in the overall diff, but not the path
119 outer_md5.update(inner_md5.hexdigest())
120
121 for s in map(str, input_strings):
122 outer_md5.update(s)
123 extended_info.append(s)
124
125 self.new_digest = outer_md5.hexdigest()
126 self.new_extended_info = extended_info
127
128 self.old_digest = ''
129 self.old_extended_info = []
130 if os.path.exists(self.record_path):
131 with open(self.record_path, 'r') as old_record:
132 self.old_extended_info = [line.strip() for line in old_record]
133 if self.old_extended_info:
134 self.old_digest = self.old_extended_info.pop(0)
135
136 def Write(self):
137 with open(self.record_path, 'w') as new_record:
138 new_record.write(self.new_digest)
139 new_record.write('\n' + '\n'.join(self.new_extended_info) + '\n')
140
141 def DescribeDifference(self):
142 if self.old_digest == self.new_digest:
143 return "There's no difference."
144 if not self.old_digest:
145 return 'Previous stamp file not found.'
146 if not self.old_extended_info:
147 return 'Previous stamp file lacks extended info.'
148 diff = difflib.unified_diff(self.old_extended_info, self.new_extended_info)
149 return '\n'.join(diff)
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698