OLD | NEW |
---|---|
1 # Copyright 2013 The Chromium Authors. All rights reserved. | 1 # Copyright 2013 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 import difflib | |
5 import hashlib | 6 import hashlib |
6 import os | 7 import os |
8 import re | |
9 | |
10 | |
11 # When set and a difference is detected, a diff of what changed is printed. | |
12 _PRINT_MD5_DIFFS = int(os.environ.get('PRINT_MD5_DIFFS', 0)) | |
jbudorick
2015/08/26 16:48:49
Environment variables tend to come back to bite us
agrieve
2015/08/26 17:59:13
Because a command-line flag would require re-gyppi
jbudorick
2015/08/26 18:02:51
It's dirty on the switch, but not afterwards. Are
jbudorick
2015/09/01 14:59:43
Hrm. I guess I can live with this for now.
| |
13 | |
14 # Used to strip off temp dir prefix. | |
15 _TEMP_DIR_PATTERN = re.compile(r'^/tmp/.*?/') | |
7 | 16 |
8 | 17 |
9 def CallAndRecordIfStale( | 18 def CallAndRecordIfStale( |
10 function, record_path=None, input_paths=None, input_strings=None, | 19 function, record_path=None, input_paths=None, input_strings=None, |
11 force=False): | 20 force=False): |
12 """Calls function if the md5sum of the input paths/strings has changed. | 21 """Calls function if the md5sum of the input paths/strings has changed. |
13 | 22 |
14 The md5sum of the inputs is compared with the one stored in record_path. If | 23 The md5sum of the inputs is compared with the one stored in record_path. If |
15 this has changed (or the record doesn't exist), function will be called and | 24 this has changed (or the record doesn't exist), function will be called and |
16 the new md5sum will be recorded. | 25 the new md5sum will be recorded. |
17 | 26 |
18 If force is True, the function will be called regardless of whether the | 27 If force is True, the function will be called regardless of whether the |
19 md5sum is out of date. | 28 md5sum is out of date. |
20 """ | 29 """ |
21 if not input_paths: | 30 if not input_paths: |
22 input_paths = [] | 31 input_paths = [] |
23 if not input_strings: | 32 if not input_strings: |
24 input_strings = [] | 33 input_strings = [] |
25 md5_checker = _Md5Checker( | 34 md5_checker = _Md5Checker( |
26 record_path=record_path, | 35 record_path=record_path, |
27 input_paths=input_paths, | 36 input_paths=input_paths, |
28 input_strings=input_strings) | 37 input_strings=input_strings) |
29 if force or md5_checker.IsStale(): | 38 |
39 is_stale = md5_checker.old_digest != md5_checker.new_digest | |
40 if force or is_stale: | |
41 if is_stale and _PRINT_MD5_DIFFS: | |
42 print '\033[93mDifference found in %s:\033[0m' % record_path | |
jbudorick
2015/08/26 16:48:49
It looks like some of the other gyp utilities, not
agrieve
2015/08/26 17:59:13
I looked into this, but IMO makes it more ugly tha
jbudorick
2015/08/26 18:02:51
I'm not really interested in doing this in two dif
jbudorick
2015/09/01 14:59:43
This, on the other hand...
agrieve
2015/09/01 17:32:58
Colorama'ed.
| |
43 print md5_checker.DescribeDifference() | |
30 function() | 44 function() |
31 md5_checker.Write() | 45 md5_checker.Write() |
32 | 46 |
33 | 47 |
34 def _UpdateMd5ForFile(md5, path, block_size=2**16): | 48 def _UpdateMd5ForFile(md5, path, block_size=2**16): |
35 with open(path, 'rb') as infile: | 49 with open(path, 'rb') as infile: |
36 while True: | 50 while True: |
37 data = infile.read(block_size) | 51 data = infile.read(block_size) |
38 if not data: | 52 if not data: |
39 break | 53 break |
40 md5.update(data) | 54 md5.update(data) |
41 | 55 |
42 | 56 |
43 def _UpdateMd5ForDirectory(md5, dir_path): | 57 def _UpdateMd5ForDirectory(md5, dir_path): |
44 for root, _, files in os.walk(dir_path): | 58 for root, _, files in os.walk(dir_path): |
45 for f in files: | 59 for f in files: |
46 _UpdateMd5ForFile(md5, os.path.join(root, f)) | 60 _UpdateMd5ForFile(md5, os.path.join(root, f)) |
47 | 61 |
48 | 62 |
49 def _UpdateMd5ForPath(md5, path): | 63 def _UpdateMd5ForPath(md5, path): |
50 if os.path.isdir(path): | 64 if os.path.isdir(path): |
51 _UpdateMd5ForDirectory(md5, path) | 65 _UpdateMd5ForDirectory(md5, path) |
52 else: | 66 else: |
53 _UpdateMd5ForFile(md5, path) | 67 _UpdateMd5ForFile(md5, path) |
54 | 68 |
55 | 69 |
70 def _TrimPathPrefix(path): | |
71 """Attempts to remove temp dir prefix from the path. | |
72 | |
73 Use this only for extended_info (not for the actual md5). | |
74 """ | |
75 return _TEMP_DIR_PATTERN.sub('{TMP}', path) | |
76 | |
77 | |
56 class _Md5Checker(object): | 78 class _Md5Checker(object): |
57 def __init__(self, record_path=None, input_paths=None, input_strings=None): | 79 def __init__(self, record_path=None, input_paths=None, input_strings=None): |
58 if not input_paths: | 80 if not input_paths: |
59 input_paths = [] | 81 input_paths = [] |
60 if not input_strings: | 82 if not input_strings: |
61 input_strings = [] | 83 input_strings = [] |
62 | 84 |
63 assert record_path.endswith('.stamp'), ( | 85 assert record_path.endswith('.stamp'), ( |
64 'record paths must end in \'.stamp\' so that they are easy to find ' | 86 'record paths must end in \'.stamp\' so that they are easy to find ' |
65 'and delete') | 87 'and delete') |
66 | 88 |
67 self.record_path = record_path | 89 self.record_path = record_path |
68 | 90 |
69 md5 = hashlib.md5() | 91 extended_info = [] |
92 outer_md5 = hashlib.md5() | |
70 for i in sorted(input_paths): | 93 for i in sorted(input_paths): |
71 _UpdateMd5ForPath(md5, i) | 94 inner_md5 = hashlib.md5() |
95 _UpdateMd5ForPath(inner_md5, i) | |
96 i = _TrimPathPrefix(i) | |
97 extended_info.append(i + '=' + inner_md5.hexdigest()) | |
98 # Include the digest in the overall diff, but not the path | |
99 outer_md5.update(inner_md5.hexdigest()) | |
100 | |
72 for s in input_strings: | 101 for s in input_strings: |
73 md5.update(s) | 102 outer_md5.update(s) |
74 self.new_digest = md5.hexdigest() | 103 extended_info.append(s) |
104 | |
105 self.new_digest = outer_md5.hexdigest() | |
106 self.new_extended_info = extended_info | |
75 | 107 |
76 self.old_digest = '' | 108 self.old_digest = '' |
109 self.old_extended_info = [] | |
77 if os.path.exists(self.record_path): | 110 if os.path.exists(self.record_path): |
78 with open(self.record_path, 'r') as old_record: | 111 with open(self.record_path, 'r') as old_record: |
79 self.old_digest = old_record.read() | 112 self.old_extended_info = [line.strip() for line in old_record] |
80 | 113 self.old_digest = self.old_extended_info.pop(0) |
81 def IsStale(self): | |
82 return self.old_digest != self.new_digest | |
83 | 114 |
84 def Write(self): | 115 def Write(self): |
85 with open(self.record_path, 'w') as new_record: | 116 with open(self.record_path, 'w') as new_record: |
86 new_record.write(self.new_digest) | 117 new_record.write(self.new_digest) |
118 new_record.write('\n' + '\n'.join(self.new_extended_info) + '\n') | |
jbudorick
2015/08/26 16:48:49
so now we're always writing the extended info?
agrieve
2015/08/26 17:59:13
Yes. I measured if this slowed anything down, and
| |
119 | |
120 def DescribeDifference(self): | |
121 if self.old_digest == self.new_digest: | |
122 return 'There\'s no difference' | |
jbudorick
2015/08/26 16:48:49
nit: double quotes when you've got an apostrophe.
agrieve
2015/08/26 17:59:13
Done.
| |
123 if not self.old_digest: | |
124 return 'Previous stamp file not found.' | |
125 if not self.old_extended_info: | |
126 return 'Previous stamp file lacks extended info.' | |
127 diff = difflib.unified_diff(self.old_extended_info, self.new_extended_info) | |
128 return '\n'.join(diff) | |
OLD | NEW |