Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(742)

Side by Side Diff: scripts/slave/recipe_modules/isolate/resources/compare_build_artifacts.py

Issue 687723008: Optimize compare_build_artifacts.py to have an even more compact output. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/build
Patch Set: Summum Created 6 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # Copyright 2014 The Chromium Authors. All rights reserved. 2 # Copyright 2014 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be 3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file. 4 # found in the LICENSE file.
5 5
6 """Compare the artifacts from two builds.""" 6 """Compare the artifacts from two builds."""
7 7
8 import difflib 8 import difflib
9 import json 9 import json
10 import optparse 10 import optparse
11 import os 11 import os
12 import struct
12 import sys 13 import sys
14 import time
13 15
14 16
15 BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 17 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
16 18
17 19
18 def get_files_to_compare(build_dir): 20 def get_files_to_compare(build_dir):
19 """Get the list of files to compare.""" 21 """Get the list of files to compare."""
20 allowed = frozenset( 22 allowed = frozenset(
21 ('', '.app', '.dll', '.dylib', '.exe', '.nexe', '.so')) 23 ('', '.app', '.dll', '.dylib', '.exe', '.nexe', '.so'))
22 24
23 def check(f): 25 def check(f):
24 if not os.path.isfile(f): 26 if not os.path.isfile(f):
25 return False 27 return False
26 if os.path.basename(f).startswith('.'): 28 if os.path.basename(f).startswith('.'):
27 return False 29 return False
28 ext = os.path.splitext(f)[1] 30 ext = os.path.splitext(f)[1]
29 if ext == '.isolated': 31 if ext == '.isolated':
30 return True 32 return True
31 return ext in allowed and os.access(f, os.X_OK) 33 return ext in allowed and os.access(f, os.X_OK)
32 34
33 return set(f for f in os.listdir(build_dir) if 35 return set(f for f in os.listdir(build_dir) if
34 check(os.path.join(build_dir, f))) 36 check(os.path.join(build_dir, f)))
35 37
36 38
39 def diff_dict(a, b):
40 """Returns a yaml-like textural diff of two dict.
41
42 It is currently optimized for the .isolated format.
43 """
44 out = ''
45 for key in set(a) | set(b):
46 va = a.get(key)
47 vb = b.get(key)
48 if va.__class__ != vb.__class__:
49 out += '- %s:\n %r != %r\n' % (key, va, vb)
50 elif isinstance(va, dict):
51 c = diff_dict(va, vb)
52 if c:
53 out += '- %s:\n%s\n' % (
54 key, '\n'.join(' ' + l for l in c.splitlines()))
55 elif va != vb:
56 out += '- %s:\n %s != %s\n' % (key, va, vb)
57 return out.rstrip()
58
59
60 def diff_binary(first_filepath, second_filepath, file_len):
61 """Returns a compact binary diff if the diff is small enough."""
62 CHUNK_SIZE = 32
63 MAX_STREAMS = 10
64 diffs = 0
65 streams = []
66 offset = 0
67 with open(first_filepath, 'rb') as lhs:
68 with open(second_filepath, 'rb') as rhs:
69 while True:
70 lhs_data = lhs.read(CHUNK_SIZE)
71 rhs_data = rhs.read(CHUNK_SIZE)
72 if not lhs_data:
73 break
74 if lhs_data != rhs_data:
75 diffs += sum(l != r for l, r in zip(lhs_data, rhs_data))
76 if streams is not None:
77 if len(streams) < MAX_STREAMS:
78 streams.append((offset, lhs_data, rhs_data))
79 else:
80 streams = None
81 offset += len(lhs_data)
82 del lhs_data
83 del rhs_data
84 if not diffs:
85 return None
86 result = '%d out of %d bytes are different (%.2f%%)' % (
87 diffs, file_len, 100.0 * diffs / file_len)
88 if streams:
89 encode = lambda text: ''.join(i if 31 < ord(i) < 128 else '.' for i in text)
90 for offset, lhs_data, rhs_data in streams:
91 lhs_line = '%s \'%s\'' % (lhs_data.encode('hex'), encode(lhs_data))
92 rhs_line = '%s \'%s\'' % (rhs_data.encode('hex'), encode(rhs_data))
93 diff = list(difflib.Differ().compare([lhs_line], [rhs_line]))[-1][2:-1]
94 result += '\n0x%-8x: %s\n %s\n %s' % (
95 offset, lhs_line, rhs_line, diff)
96 return result
97
98
37 def compare_files(first_filepath, second_filepath): 99 def compare_files(first_filepath, second_filepath):
38 """Compares two binaries and return the number of differences between them. 100 """Compares two binaries and return the number of differences between them.
39 101
40 Returns None if the files are equal, a string otherwise. 102 Returns None if the files are equal, a string otherwise.
41 """ 103 """
104 if first_filepath.endswith('.isolated'):
105 with open(first_filepath, 'rb') as f:
106 lhs = json.load(f)
107 with open(second_filepath, 'rb') as f:
108 rhs = json.load(f)
109 diff = diff_dict(lhs, rhs)
110 if diff:
111 return '\n' + diff
112
42 file_len = os.stat(first_filepath).st_size 113 file_len = os.stat(first_filepath).st_size
43 if file_len != os.stat(second_filepath).st_size: 114 if file_len != os.stat(second_filepath).st_size:
44 return 'different size: %d != %d' % ( 115 return 'different size: %d != %d' % (
45 file_len, os.stat(second_filepath).st_size) 116 file_len, os.stat(second_filepath).st_size)
46 117
47 chunk_size = 1024 * 1024 118 return diff_binary(first_filepath, second_filepath, file_len)
48 diffs = 0
49 with open(first_filepath, 'rb') as lhs:
50 with open(second_filepath, 'rb') as rhs:
51 while True:
52 lhs_data = lhs.read(chunk_size)
53 rhs_data = rhs.read(chunk_size)
54 if not lhs_data:
55 break
56 diffs += sum(l != r for l, r in zip(lhs_data, rhs_data))
57 if not diffs:
58 return None
59
60 result = '%d out of %d bytes are different (%.2f%%)' % (
61 diffs, file_len, 100.0 * diffs / file_len)
62
63 if diffs and first_filepath.endswith('.isolated'):
64 # Unpack the files.
65 with open(first_filepath, 'rb') as f:
66 lhs = json.dumps(
67 json.load(f), indent=2, sort_keys=True,
68 separators=(',', ': ')).splitlines()
69 with open(second_filepath, 'rb') as f:
70 rhs = json.dumps(
71 json.load(f), indent=2, sort_keys=True,
72 separators=(',', ': ')).splitlines()
73
74 result += '\n' + '\n'.join(
75 line for line in difflib.unified_diff(lhs, rhs)
76 if not line.startswith(('---', '+++')))
77 return result
78 119
79 120
80 def compare_build_artifacts(first_dir, second_dir): 121 def compare_build_artifacts(first_dir, second_dir):
81 """Compare the artifacts from two distinct builds.""" 122 """Compare the artifacts from two distinct builds."""
82 if not os.path.isdir(first_dir): 123 if not os.path.isdir(first_dir):
83 print >> sys.stderr, '%s isn\'t a valid directory.' % first_dir 124 print >> sys.stderr, '%s isn\'t a valid directory.' % first_dir
84 return 1 125 return 1
85 if not os.path.isdir(second_dir): 126 if not os.path.isdir(second_dir):
86 print >> sys.stderr, '%s isn\'t a valid directory.' % second_dir 127 print >> sys.stderr, '%s isn\'t a valid directory.' % second_dir
87 return 1 128 return 1
88 129
89 with open(os.path.join(BASE_DIR, 'deterministic_build_blacklist.json')) as f: 130 with open(os.path.join(BASE_DIR, 'deterministic_build_blacklist.json')) as f:
90 blacklist = frozenset(json.load(f)) 131 blacklist = frozenset(json.load(f))
91 132
92 res = 0 133 res = 0
93 first_list = get_files_to_compare(first_dir) - blacklist 134 first_list = get_files_to_compare(first_dir) - blacklist
94 second_list = get_files_to_compare(second_dir) - blacklist 135 second_list = get_files_to_compare(second_dir) - blacklist
95 136
96 diff = first_list.symmetric_difference(second_list) 137 diff = first_list.symmetric_difference(second_list)
97 if diff: 138 if diff:
98 print >> sys.stderr, 'Different list of files in both directories' 139 print >> sys.stderr, 'Different list of files in both directories'
99 print >> sys.stderr, '\n'.join(' ' + i for i in sorted(diff)) 140 print >> sys.stderr, '\n'.join(' ' + i for i in sorted(diff))
100 res += len(diff) 141 res += len(diff)
101 142
143 epoch_hex = struct.pack('<I', int(time.time())).encode('hex')
144 print('Epoch: %s' %
145 ' '.join(epoch_hex[i:i+2] for i in xrange(0, len(epoch_hex), 2)))
102 max_filepath_len = max(len(n) for n in first_list & second_list) 146 max_filepath_len = max(len(n) for n in first_list & second_list)
103 for f in sorted(first_list & second_list): 147 for f in sorted(first_list & second_list):
104 first_file = os.path.join(first_dir, f) 148 first_file = os.path.join(first_dir, f)
105 second_file = os.path.join(second_dir, f) 149 second_file = os.path.join(second_dir, f)
106 result = compare_files(first_file, second_file) 150 result = compare_files(first_file, second_file)
107 if not result: 151 if not result:
108 result = 'equal' 152 result = 'equal'
109 else: 153 else:
110 result = 'DIFFERENT: %s' % result 154 result = 'DIFFERENT: %s' % result
111 res += 1 155 res += 1
(...skipping 17 matching lines...) Expand all
129 parser.error('--first-build-dir is required') 173 parser.error('--first-build-dir is required')
130 if not options.second_build_dir: 174 if not options.second_build_dir:
131 parser.error('--second-build-dir is required') 175 parser.error('--second-build-dir is required')
132 176
133 return compare_build_artifacts(options.first_build_dir, 177 return compare_build_artifacts(options.first_build_dir,
134 options.second_build_dir) 178 options.second_build_dir)
135 179
136 180
137 if __name__ == '__main__': 181 if __name__ == '__main__':
138 sys.exit(main()) 182 sys.exit(main())
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698