| OLD | NEW |
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 # Copyright 2014 The Chromium Authors. All rights reserved. | 2 # Copyright 2014 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
| 5 | 5 |
| 6 """Compare the artifacts from two builds.""" | 6 """Compare the artifacts from two builds.""" |
| 7 | 7 |
| 8 import difflib | 8 import difflib |
| 9 import json | 9 import json |
| 10 import optparse | 10 import optparse |
| 11 import os | 11 import os |
| 12 import struct |
| 12 import sys | 13 import sys |
| 14 import time |
| 13 | 15 |
| 14 | 16 |
| 15 BASE_DIR = os.path.dirname(os.path.abspath(__file__)) | 17 BASE_DIR = os.path.dirname(os.path.abspath(__file__)) |
| 16 | 18 |
| 17 | 19 |
| 18 def get_files_to_compare(build_dir): | 20 def get_files_to_compare(build_dir): |
| 19 """Get the list of files to compare.""" | 21 """Get the list of files to compare.""" |
| 20 allowed = frozenset( | 22 allowed = frozenset( |
| 21 ('', '.app', '.dll', '.dylib', '.exe', '.nexe', '.so')) | 23 ('', '.app', '.dll', '.dylib', '.exe', '.nexe', '.so')) |
| 22 | 24 |
| 23 def check(f): | 25 def check(f): |
| 24 if not os.path.isfile(f): | 26 if not os.path.isfile(f): |
| 25 return False | 27 return False |
| 26 if os.path.basename(f).startswith('.'): | 28 if os.path.basename(f).startswith('.'): |
| 27 return False | 29 return False |
| 28 ext = os.path.splitext(f)[1] | 30 ext = os.path.splitext(f)[1] |
| 29 if ext == '.isolated': | 31 if ext == '.isolated': |
| 30 return True | 32 return True |
| 31 return ext in allowed and os.access(f, os.X_OK) | 33 return ext in allowed and os.access(f, os.X_OK) |
| 32 | 34 |
| 33 return set(f for f in os.listdir(build_dir) if | 35 return set(f for f in os.listdir(build_dir) if |
| 34 check(os.path.join(build_dir, f))) | 36 check(os.path.join(build_dir, f))) |
| 35 | 37 |
| 36 | 38 |
| 39 def diff_dict(a, b): |
| 40 """Returns a yaml-like textural diff of two dict. |
| 41 |
| 42 It is currently optimized for the .isolated format. |
| 43 """ |
| 44 out = '' |
| 45 for key in set(a) | set(b): |
| 46 va = a.get(key) |
| 47 vb = b.get(key) |
| 48 if va.__class__ != vb.__class__: |
| 49 out += '- %s:\n %r != %r\n' % (key, va, vb) |
| 50 elif isinstance(va, dict): |
| 51 c = diff_dict(va, vb) |
| 52 if c: |
| 53 out += '- %s:\n%s\n' % ( |
| 54 key, '\n'.join(' ' + l for l in c.splitlines())) |
| 55 elif va != vb: |
| 56 out += '- %s:\n %s != %s\n' % (key, va, vb) |
| 57 return out.rstrip() |
| 58 |
| 59 |
| 60 def diff_binary(first_filepath, second_filepath, file_len): |
| 61 """Returns a compact binary diff if the diff is small enough.""" |
| 62 CHUNK_SIZE = 32 |
| 63 MAX_STREAMS = 10 |
| 64 diffs = 0 |
| 65 streams = [] |
| 66 offset = 0 |
| 67 with open(first_filepath, 'rb') as lhs: |
| 68 with open(second_filepath, 'rb') as rhs: |
| 69 while True: |
| 70 lhs_data = lhs.read(CHUNK_SIZE) |
| 71 rhs_data = rhs.read(CHUNK_SIZE) |
| 72 if not lhs_data: |
| 73 break |
| 74 if lhs_data != rhs_data: |
| 75 diffs += sum(l != r for l, r in zip(lhs_data, rhs_data)) |
| 76 if streams is not None: |
| 77 if len(streams) < MAX_STREAMS: |
| 78 streams.append((offset, lhs_data, rhs_data)) |
| 79 else: |
| 80 streams = None |
| 81 offset += len(lhs_data) |
| 82 del lhs_data |
| 83 del rhs_data |
| 84 if not diffs: |
| 85 return None |
| 86 result = '%d out of %d bytes are different (%.2f%%)' % ( |
| 87 diffs, file_len, 100.0 * diffs / file_len) |
| 88 if streams: |
| 89 encode = lambda text: ''.join(i if 31 < ord(i) < 128 else '.' for i in text) |
| 90 for offset, lhs_data, rhs_data in streams: |
| 91 lhs_line = '%s \'%s\'' % (lhs_data.encode('hex'), encode(lhs_data)) |
| 92 rhs_line = '%s \'%s\'' % (rhs_data.encode('hex'), encode(rhs_data)) |
| 93 diff = list(difflib.Differ().compare([lhs_line], [rhs_line]))[-1][2:-1] |
| 94 result += '\n0x%-8x: %s\n %s\n %s' % ( |
| 95 offset, lhs_line, rhs_line, diff) |
| 96 return result |
| 97 |
| 98 |
| 37 def compare_files(first_filepath, second_filepath): | 99 def compare_files(first_filepath, second_filepath): |
| 38 """Compares two binaries and return the number of differences between them. | 100 """Compares two binaries and return the number of differences between them. |
| 39 | 101 |
| 40 Returns None if the files are equal, a string otherwise. | 102 Returns None if the files are equal, a string otherwise. |
| 41 """ | 103 """ |
| 104 if first_filepath.endswith('.isolated'): |
| 105 with open(first_filepath, 'rb') as f: |
| 106 lhs = json.load(f) |
| 107 with open(second_filepath, 'rb') as f: |
| 108 rhs = json.load(f) |
| 109 diff = diff_dict(lhs, rhs) |
| 110 if diff: |
| 111 return '\n' + diff |
| 112 |
| 42 file_len = os.stat(first_filepath).st_size | 113 file_len = os.stat(first_filepath).st_size |
| 43 if file_len != os.stat(second_filepath).st_size: | 114 if file_len != os.stat(second_filepath).st_size: |
| 44 return 'different size: %d != %d' % ( | 115 return 'different size: %d != %d' % ( |
| 45 file_len, os.stat(second_filepath).st_size) | 116 file_len, os.stat(second_filepath).st_size) |
| 46 | 117 |
| 47 chunk_size = 1024 * 1024 | 118 return diff_binary(first_filepath, second_filepath, file_len) |
| 48 diffs = 0 | |
| 49 with open(first_filepath, 'rb') as lhs: | |
| 50 with open(second_filepath, 'rb') as rhs: | |
| 51 while True: | |
| 52 lhs_data = lhs.read(chunk_size) | |
| 53 rhs_data = rhs.read(chunk_size) | |
| 54 if not lhs_data: | |
| 55 break | |
| 56 diffs += sum(l != r for l, r in zip(lhs_data, rhs_data)) | |
| 57 if not diffs: | |
| 58 return None | |
| 59 | |
| 60 result = '%d out of %d bytes are different (%.2f%%)' % ( | |
| 61 diffs, file_len, 100.0 * diffs / file_len) | |
| 62 | |
| 63 if diffs and first_filepath.endswith('.isolated'): | |
| 64 # Unpack the files. | |
| 65 with open(first_filepath, 'rb') as f: | |
| 66 lhs = json.dumps( | |
| 67 json.load(f), indent=2, sort_keys=True, | |
| 68 separators=(',', ': ')).splitlines() | |
| 69 with open(second_filepath, 'rb') as f: | |
| 70 rhs = json.dumps( | |
| 71 json.load(f), indent=2, sort_keys=True, | |
| 72 separators=(',', ': ')).splitlines() | |
| 73 | |
| 74 result += '\n' + '\n'.join( | |
| 75 line for line in difflib.unified_diff(lhs, rhs) | |
| 76 if not line.startswith(('---', '+++'))) | |
| 77 return result | |
| 78 | 119 |
| 79 | 120 |
| 80 def compare_build_artifacts(first_dir, second_dir): | 121 def compare_build_artifacts(first_dir, second_dir): |
| 81 """Compare the artifacts from two distinct builds.""" | 122 """Compare the artifacts from two distinct builds.""" |
| 82 if not os.path.isdir(first_dir): | 123 if not os.path.isdir(first_dir): |
| 83 print >> sys.stderr, '%s isn\'t a valid directory.' % first_dir | 124 print >> sys.stderr, '%s isn\'t a valid directory.' % first_dir |
| 84 return 1 | 125 return 1 |
| 85 if not os.path.isdir(second_dir): | 126 if not os.path.isdir(second_dir): |
| 86 print >> sys.stderr, '%s isn\'t a valid directory.' % second_dir | 127 print >> sys.stderr, '%s isn\'t a valid directory.' % second_dir |
| 87 return 1 | 128 return 1 |
| 88 | 129 |
| 89 with open(os.path.join(BASE_DIR, 'deterministic_build_blacklist.json')) as f: | 130 with open(os.path.join(BASE_DIR, 'deterministic_build_blacklist.json')) as f: |
| 90 blacklist = frozenset(json.load(f)) | 131 blacklist = frozenset(json.load(f)) |
| 91 | 132 |
| 92 res = 0 | 133 res = 0 |
| 93 first_list = get_files_to_compare(first_dir) - blacklist | 134 first_list = get_files_to_compare(first_dir) - blacklist |
| 94 second_list = get_files_to_compare(second_dir) - blacklist | 135 second_list = get_files_to_compare(second_dir) - blacklist |
| 95 | 136 |
| 96 diff = first_list.symmetric_difference(second_list) | 137 diff = first_list.symmetric_difference(second_list) |
| 97 if diff: | 138 if diff: |
| 98 print >> sys.stderr, 'Different list of files in both directories' | 139 print >> sys.stderr, 'Different list of files in both directories' |
| 99 print >> sys.stderr, '\n'.join(' ' + i for i in sorted(diff)) | 140 print >> sys.stderr, '\n'.join(' ' + i for i in sorted(diff)) |
| 100 res += len(diff) | 141 res += len(diff) |
| 101 | 142 |
| 143 epoch_hex = struct.pack('<I', int(time.time())).encode('hex') |
| 144 print('Epoch: %s' % |
| 145 ' '.join(epoch_hex[i:i+2] for i in xrange(0, len(epoch_hex), 2))) |
| 102 max_filepath_len = max(len(n) for n in first_list & second_list) | 146 max_filepath_len = max(len(n) for n in first_list & second_list) |
| 103 for f in sorted(first_list & second_list): | 147 for f in sorted(first_list & second_list): |
| 104 first_file = os.path.join(first_dir, f) | 148 first_file = os.path.join(first_dir, f) |
| 105 second_file = os.path.join(second_dir, f) | 149 second_file = os.path.join(second_dir, f) |
| 106 result = compare_files(first_file, second_file) | 150 result = compare_files(first_file, second_file) |
| 107 if not result: | 151 if not result: |
| 108 result = 'equal' | 152 result = 'equal' |
| 109 else: | 153 else: |
| 110 result = 'DIFFERENT: %s' % result | 154 result = 'DIFFERENT: %s' % result |
| 111 res += 1 | 155 res += 1 |
| (...skipping 17 matching lines...) Expand all Loading... |
| 129 parser.error('--first-build-dir is required') | 173 parser.error('--first-build-dir is required') |
| 130 if not options.second_build_dir: | 174 if not options.second_build_dir: |
| 131 parser.error('--second-build-dir is required') | 175 parser.error('--second-build-dir is required') |
| 132 | 176 |
| 133 return compare_build_artifacts(options.first_build_dir, | 177 return compare_build_artifacts(options.first_build_dir, |
| 134 options.second_build_dir) | 178 options.second_build_dir) |
| 135 | 179 |
| 136 | 180 |
| 137 if __name__ == '__main__': | 181 if __name__ == '__main__': |
| 138 sys.exit(main()) | 182 sys.exit(main()) |
| OLD | NEW |