OLD | NEW |
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # Copyright 2014 The Chromium Authors. All rights reserved. | 2 # Copyright 2014 The Chromium Authors. All rights reserved. |
3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
5 | 5 |
6 """Compare the artifacts from two builds.""" | 6 """Compare the artifacts from two builds.""" |
7 | 7 |
8 import difflib | 8 import difflib |
9 import json | 9 import json |
10 import optparse | 10 import optparse |
11 import os | 11 import os |
| 12 import struct |
12 import sys | 13 import sys |
| 14 import time |
13 | 15 |
14 | 16 |
15 BASE_DIR = os.path.dirname(os.path.abspath(__file__)) | 17 BASE_DIR = os.path.dirname(os.path.abspath(__file__)) |
16 | 18 |
17 | 19 |
18 def get_files_to_compare(build_dir): | 20 def get_files_to_compare(build_dir): |
19 """Get the list of files to compare.""" | 21 """Get the list of files to compare.""" |
20 allowed = frozenset( | 22 allowed = frozenset( |
21 ('', '.app', '.dll', '.dylib', '.exe', '.nexe', '.so')) | 23 ('', '.app', '.dll', '.dylib', '.exe', '.nexe', '.so')) |
22 | 24 |
23 def check(f): | 25 def check(f): |
24 if not os.path.isfile(f): | 26 if not os.path.isfile(f): |
25 return False | 27 return False |
26 if os.path.basename(f).startswith('.'): | 28 if os.path.basename(f).startswith('.'): |
27 return False | 29 return False |
28 ext = os.path.splitext(f)[1] | 30 ext = os.path.splitext(f)[1] |
29 if ext == '.isolated': | 31 if ext == '.isolated': |
30 return True | 32 return True |
31 return ext in allowed and os.access(f, os.X_OK) | 33 return ext in allowed and os.access(f, os.X_OK) |
32 | 34 |
33 return set(f for f in os.listdir(build_dir) if | 35 return set(f for f in os.listdir(build_dir) if |
34 check(os.path.join(build_dir, f))) | 36 check(os.path.join(build_dir, f))) |
35 | 37 |
36 | 38 |
| 39 def diff_dict(a, b): |
| 40 """Returns a yaml-like textural diff of two dict. |
| 41 |
| 42 It is currently optimized for the .isolated format. |
| 43 """ |
| 44 out = '' |
| 45 for key in set(a) | set(b): |
| 46 va = a.get(key) |
| 47 vb = b.get(key) |
| 48 if va.__class__ != vb.__class__: |
| 49 out += '- %s:\n %r != %r\n' % (key, va, vb) |
| 50 elif isinstance(va, dict): |
| 51 c = diff_dict(va, vb) |
| 52 if c: |
| 53 out += '- %s:\n%s\n' % ( |
| 54 key, '\n'.join(' ' + l for l in c.splitlines())) |
| 55 elif va != vb: |
| 56 out += '- %s:\n %s != %s\n' % (key, va, vb) |
| 57 return out.rstrip() |
| 58 |
| 59 |
| 60 def diff_binary(first_filepath, second_filepath, file_len): |
| 61 """Returns a compact binary diff if the diff is small enough.""" |
| 62 CHUNK_SIZE = 32 |
| 63 MAX_STREAMS = 10 |
| 64 diffs = 0 |
| 65 streams = [] |
| 66 offset = 0 |
| 67 with open(first_filepath, 'rb') as lhs: |
| 68 with open(second_filepath, 'rb') as rhs: |
| 69 while True: |
| 70 lhs_data = lhs.read(CHUNK_SIZE) |
| 71 rhs_data = rhs.read(CHUNK_SIZE) |
| 72 if not lhs_data: |
| 73 break |
| 74 if lhs_data != rhs_data: |
| 75 diffs += sum(l != r for l, r in zip(lhs_data, rhs_data)) |
| 76 if streams is not None: |
| 77 if len(streams) < MAX_STREAMS: |
| 78 streams.append((offset, lhs_data, rhs_data)) |
| 79 else: |
| 80 streams = None |
| 81 offset += len(lhs_data) |
| 82 del lhs_data |
| 83 del rhs_data |
| 84 if not diffs: |
| 85 return None |
| 86 result = '%d out of %d bytes are different (%.2f%%)' % ( |
| 87 diffs, file_len, 100.0 * diffs / file_len) |
| 88 if streams: |
| 89 encode = lambda text: ''.join(i if 31 < ord(i) < 128 else '.' for i in text) |
| 90 for offset, lhs_data, rhs_data in streams: |
| 91 lhs_line = '%s \'%s\'' % (lhs_data.encode('hex'), encode(lhs_data)) |
| 92 rhs_line = '%s \'%s\'' % (rhs_data.encode('hex'), encode(rhs_data)) |
| 93 diff = list(difflib.Differ().compare([lhs_line], [rhs_line]))[-1][2:-1] |
| 94 result += '\n0x%-8x: %s\n %s\n %s' % ( |
| 95 offset, lhs_line, rhs_line, diff) |
| 96 return result |
| 97 |
| 98 |
37 def compare_files(first_filepath, second_filepath): | 99 def compare_files(first_filepath, second_filepath): |
38 """Compares two binaries and return the number of differences between them. | 100 """Compares two binaries and return the number of differences between them. |
39 | 101 |
40 Returns None if the files are equal, a string otherwise. | 102 Returns None if the files are equal, a string otherwise. |
41 """ | 103 """ |
| 104 if first_filepath.endswith('.isolated'): |
| 105 with open(first_filepath, 'rb') as f: |
| 106 lhs = json.load(f) |
| 107 with open(second_filepath, 'rb') as f: |
| 108 rhs = json.load(f) |
| 109 diff = diff_dict(lhs, rhs) |
| 110 if diff: |
| 111 return '\n' + diff |
| 112 |
42 file_len = os.stat(first_filepath).st_size | 113 file_len = os.stat(first_filepath).st_size |
43 if file_len != os.stat(second_filepath).st_size: | 114 if file_len != os.stat(second_filepath).st_size: |
44 return 'different size: %d != %d' % ( | 115 return 'different size: %d != %d' % ( |
45 file_len, os.stat(second_filepath).st_size) | 116 file_len, os.stat(second_filepath).st_size) |
46 | 117 |
47 chunk_size = 1024 * 1024 | 118 return diff_binary(first_filepath, second_filepath, file_len) |
48 diffs = 0 | |
49 with open(first_filepath, 'rb') as lhs: | |
50 with open(second_filepath, 'rb') as rhs: | |
51 while True: | |
52 lhs_data = lhs.read(chunk_size) | |
53 rhs_data = rhs.read(chunk_size) | |
54 if not lhs_data: | |
55 break | |
56 diffs += sum(l != r for l, r in zip(lhs_data, rhs_data)) | |
57 if not diffs: | |
58 return None | |
59 | |
60 result = '%d out of %d bytes are different (%.2f%%)' % ( | |
61 diffs, file_len, 100.0 * diffs / file_len) | |
62 | |
63 if diffs and first_filepath.endswith('.isolated'): | |
64 # Unpack the files. | |
65 with open(first_filepath, 'rb') as f: | |
66 lhs = json.dumps( | |
67 json.load(f), indent=2, sort_keys=True, | |
68 separators=(',', ': ')).splitlines() | |
69 with open(second_filepath, 'rb') as f: | |
70 rhs = json.dumps( | |
71 json.load(f), indent=2, sort_keys=True, | |
72 separators=(',', ': ')).splitlines() | |
73 | |
74 result += '\n' + '\n'.join( | |
75 line for line in difflib.unified_diff(lhs, rhs) | |
76 if not line.startswith(('---', '+++'))) | |
77 return result | |
78 | 119 |
79 | 120 |
80 def compare_build_artifacts(first_dir, second_dir): | 121 def compare_build_artifacts(first_dir, second_dir): |
81 """Compare the artifacts from two distinct builds.""" | 122 """Compare the artifacts from two distinct builds.""" |
82 if not os.path.isdir(first_dir): | 123 if not os.path.isdir(first_dir): |
83 print >> sys.stderr, '%s isn\'t a valid directory.' % first_dir | 124 print >> sys.stderr, '%s isn\'t a valid directory.' % first_dir |
84 return 1 | 125 return 1 |
85 if not os.path.isdir(second_dir): | 126 if not os.path.isdir(second_dir): |
86 print >> sys.stderr, '%s isn\'t a valid directory.' % second_dir | 127 print >> sys.stderr, '%s isn\'t a valid directory.' % second_dir |
87 return 1 | 128 return 1 |
88 | 129 |
89 with open(os.path.join(BASE_DIR, 'deterministic_build_blacklist.json')) as f: | 130 with open(os.path.join(BASE_DIR, 'deterministic_build_blacklist.json')) as f: |
90 blacklist = frozenset(json.load(f)) | 131 blacklist = frozenset(json.load(f)) |
91 | 132 |
92 res = 0 | 133 res = 0 |
93 first_list = get_files_to_compare(first_dir) - blacklist | 134 first_list = get_files_to_compare(first_dir) - blacklist |
94 second_list = get_files_to_compare(second_dir) - blacklist | 135 second_list = get_files_to_compare(second_dir) - blacklist |
95 | 136 |
96 diff = first_list.symmetric_difference(second_list) | 137 diff = first_list.symmetric_difference(second_list) |
97 if diff: | 138 if diff: |
98 print >> sys.stderr, 'Different list of files in both directories' | 139 print >> sys.stderr, 'Different list of files in both directories' |
99 print >> sys.stderr, '\n'.join(' ' + i for i in sorted(diff)) | 140 print >> sys.stderr, '\n'.join(' ' + i for i in sorted(diff)) |
100 res += len(diff) | 141 res += len(diff) |
101 | 142 |
| 143 epoch_hex = struct.pack('<I', int(time.time())).encode('hex') |
| 144 print('Epoch: %s' % |
| 145 ' '.join(epoch_hex[i:i+2] for i in xrange(0, len(epoch_hex), 2))) |
102 max_filepath_len = max(len(n) for n in first_list & second_list) | 146 max_filepath_len = max(len(n) for n in first_list & second_list) |
103 for f in sorted(first_list & second_list): | 147 for f in sorted(first_list & second_list): |
104 first_file = os.path.join(first_dir, f) | 148 first_file = os.path.join(first_dir, f) |
105 second_file = os.path.join(second_dir, f) | 149 second_file = os.path.join(second_dir, f) |
106 result = compare_files(first_file, second_file) | 150 result = compare_files(first_file, second_file) |
107 if not result: | 151 if not result: |
108 result = 'equal' | 152 result = 'equal' |
109 else: | 153 else: |
110 result = 'DIFFERENT: %s' % result | 154 result = 'DIFFERENT: %s' % result |
111 res += 1 | 155 res += 1 |
(...skipping 17 matching lines...) Expand all Loading... |
129 parser.error('--first-build-dir is required') | 173 parser.error('--first-build-dir is required') |
130 if not options.second_build_dir: | 174 if not options.second_build_dir: |
131 parser.error('--second-build-dir is required') | 175 parser.error('--second-build-dir is required') |
132 | 176 |
133 return compare_build_artifacts(options.first_build_dir, | 177 return compare_build_artifacts(options.first_build_dir, |
134 options.second_build_dir) | 178 options.second_build_dir) |
135 | 179 |
136 | 180 |
137 if __name__ == '__main__': | 181 if __name__ == '__main__': |
138 sys.exit(main()) | 182 sys.exit(main()) |
OLD | NEW |