Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(112)

Side by Side Diff: third_party/apk-patch-size-estimator/apk_patch_size_estimator.py

Issue 2718423002: Add apk-patch-size-estimator to //third_party. (Closed)
Patch Set: Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 #!/usr/bin/python
2 #
3 # Copyright 2016 Google Inc. All Rights Reserved.
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16
17 """Estimates the size of Google Play patches and the new gzipped APK.
18
19 From two APKs it estimates the size of new patches as well as
20 the size of a gzipped version of the APK, which would be used in
21 cases where patches are unexpectedly large, unavailable, or unsuitable.
22 Google Play uses multiple techniques to generate patches and generally picks
23 the best match for the device. The best match is usually, but not always, the
24 smallest patch file produced. The numbers that this script produces are
25 ESTIMATES that can be used to characterize the impact of arbitrary changes to
26 APKs. There is NO GUARANTEE that this tool produces the same patches or patch
27 sizes that Google Play generates, stores or transmits, and the actual
28 implementation within Google Play may change at any time, without notice.
29
30 """
31
32 import sys
33 import argparse
34 import locale
35 import math
36 import os
37 import subprocess
38
39 bsdiff_path = None
40 gzip_path = None
41 head_path = None
42 tail_path = None
43 bunzip2_path = None
44 java_path = None
45
46
47 def find_bins_or_die():
48 """Checks that all the binaries needed are available.
49
50 The script needs bsdiff, gzip, head, tail and bunzip2
51 binaries availables in the system.
52 """
53
54 global bsdiff_path
55 if not bsdiff_path:
56 bsdiff_path = find_binary('bsdiff')
57 global gzip_path
58 if not gzip_path:
59 gzip_path = find_binary('gzip')
60 global head_path
61 if not head_path:
62 head_path = find_binary('head')
63 global tail_path
64 if not tail_path:
65 tail_path = find_binary('tail')
66 global bunzip2_path
67 if not bunzip2_path:
68 bunzip2_path = find_binary('bunzip2')
69 global java_path
70 if not java_path:
71 java_path = find_binary('java')
72
73
74 def find_binary(binary_name):
75 """Finds the path of a binary."""
76
77 try:
78 return subprocess.check_output(['which', binary_name]).strip()
79 except subprocess.CalledProcessError:
80 raise Exception(
81 'No "' + binary_name + '" on PATH, please install or fix PATH.')
82
83
84 def human_file_size(size):
85 """Converts a byte size number into a human readable value."""
86
87 size = abs(size)
88 if size == 0:
89 return '0B'
90 units = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB']
91 p = math.floor(math.log(size, 2) / 10)
92 return '%.3g%s' % (size/math.pow(1024, p), units[int(p)])
93
94
95 def calculate_bsdiff(old_file, new_file, save_patch_path, temp_path):
96 """Estimates the size the Bsdiff patch gzipped.
97
98 Args:
99 old_file: the old APK file
100 new_file: the new APK file
101 save_patch_path: the path including filename to save the generated patch.
102 temp_path: the directory to use for the process
103
104 Returns:
105 a dictionary with:
106 'gzipped_new_file_size': the estimated size of the new gzipped APK
107 'bsdiff_patch_size': the estimated size of the patch from the two APKs
108
109 Raises:
110 Exception: if there is a problem calling the binaries needed in the process
111 """
112
113 # Oddities:
114 # Bsdiff forces bzip2 compression, which starts after byte 32. Bzip2 isn't
115 # necessarily the best choice in all cases, and isn't necessarily what Google
116 # Play uses, so it has to be uncompressed and rewritten with gzip.
117
118 # Checks that the OS binaries needed are available
119 find_bins_or_die()
120 # Clean temp files
121 if os.path.exists(temp_path): os.remove(temp_path)
122
123 # Create the bsdiff of the two APKs
124 subprocess.check_output(
125 [bsdiff_path, old_file, new_file, temp_path])
126
127 # bsdiff paths
128 raw_bsdiff_path = temp_path + '.raw_bsdiff'
129 bzipped_bsdiff_path = raw_bsdiff_path + '.bz2'
130 gzipped_bsdiff_path = raw_bsdiff_path + '.gz'
131 bsdiff_header_path = temp_path + '.raw_bsdiff_header'
132 if os.path.exists(raw_bsdiff_path): os.remove(raw_bsdiff_path)
133 if os.path.exists(bzipped_bsdiff_path): os.remove(bzipped_bsdiff_path)
134 if os.path.exists(gzipped_bsdiff_path): os.remove(gzipped_bsdiff_path)
135 if os.path.exists(bsdiff_header_path): os.remove(bsdiff_header_path)
136
137 # Strip the first 32 bytes the bsdiff file, which is a bsdiff-specific header.
138 bsdiff_header = open(bsdiff_header_path, 'w')
139 p = subprocess.Popen(
140 [head_path, '-c', '32', bsdiff_header_path],
141 shell=False, stdout=bsdiff_header)
142 ret_code = p.wait()
143 if ret_code != 0:
144 raise Exception('Problem at the bsdiff step, returned code: %s' % ret_code)
145 bsdiff_header.flush()
146 bsdiff_header.close()
147
148 # Take the remainder of the file to gain an uncompressed copy.
149 bzipped_bsdiff_patch = open(bzipped_bsdiff_path, 'w')
150 p = subprocess.Popen(
151 [tail_path, '-c', '+33', temp_path],
152 shell=False, stdout=bzipped_bsdiff_patch)
153 ret_code = p.wait()
154 if ret_code != 0:
155 raise Exception('Problem at the tail step, returned code: %s' % ret_code)
156 bzipped_bsdiff_patch.flush()
157 bzipped_bsdiff_patch.close()
158 subprocess.check_output([bunzip2_path, '-d', '-q', bzipped_bsdiff_path])
159
160 # Prepend the 32 bytes of bsdiff header back onto the uncompressed file.
161 if save_patch_path:
162 rebuilt_bsdiff_path = save_patch_path + '-bsdiff-patch'
163 else:
164 rebuilt_bsdiff_path = raw_bsdiff_path + '.rebuilt'
165 gzipped_rebuilt_bsdiff_path = rebuilt_bsdiff_path + '.gz'
166 if os.path.exists(rebuilt_bsdiff_path): os.remove(rebuilt_bsdiff_path)
167 if os.path.exists(gzipped_rebuilt_bsdiff_path):
168 os.remove(gzipped_rebuilt_bsdiff_path)
169 rebuilt_bsdiff = open(rebuilt_bsdiff_path, 'w')
170 p = subprocess.Popen(
171 ['cat', bsdiff_header_path, raw_bsdiff_path],
172 shell=False, stdout=rebuilt_bsdiff)
173 ret_code = p.wait()
174 if ret_code != 0:
175 raise Exception('Problem at the cat step, returned code: %s' % ret_code)
176 rebuilt_bsdiff.flush()
177 rebuilt_bsdiff.close()
178
179 # gzip the patch and get its size.
180 subprocess.check_output([gzip_path, '-9', rebuilt_bsdiff_path])
181 bsdiff_patch_size = os.stat(gzipped_rebuilt_bsdiff_path).st_size
182
183 # Clean up.
184 if os.path.exists(temp_path): os.remove(temp_path)
185 if os.path.exists(raw_bsdiff_path): os.remove(raw_bsdiff_path)
186 if os.path.exists(bsdiff_header_path): os.remove(bsdiff_header_path)
187 if os.path.exists(gzipped_bsdiff_path): os.remove(gzipped_bsdiff_path)
188 if not save_patch_path and os.path.exists(gzipped_rebuilt_bsdiff_path):
189 os.remove(gzipped_rebuilt_bsdiff_path)
190
191 return bsdiff_patch_size
192
193
194 def calculate_new_apk(new_file, temp_path):
195 """Estimates the size the new APK gzipped.
196
197 Args:
198 new_file: the new APK file
199 temp_path: the directory to use for the process
200
201 Returns:
202 the size of the new APK gzipped
203
204 Raises:
205 Exception: if there is a problem calling the binaries needed in the process
206 """
207
208 # Checks that the OS binaries needed are available
209 find_bins_or_die()
210 # Clean temp files
211 if os.path.exists(temp_path + '.gz'): os.remove(temp_path + '.gz')
212
213 # gzip new APK and get its size
214 gzipped_new_file = open(temp_path, 'w')
215 p = subprocess.Popen(
216 [gzip_path, '--keep', '-c', '-9', new_file],
217 shell=False, stdout=gzipped_new_file)
218 ret_code = p.wait()
219 if ret_code != 0: raise Exception(
220 'Problem gzipping the new APK, returned code: %s' % ret_code)
221 gzipped_new_file.flush()
222 gzipped_new_file.close()
223 gzipped_size = os.stat(temp_path).st_size
224 # Clean up
225 if os.path.exists(temp_path + '.gz'): os.remove(temp_path + '.gz')
226 return gzipped_size
227
228
229 def calculate_filebyfile(old_file, new_file, save_patch_path, temp_path):
230 """Estimates the size the File-by-File patch gzipped.
231
232 Args:
233 old_file: the old APK file
234 new_file: the new APK file
235 save_patch_path: the path including filename to save the generated patch.
236 temp_path: the directory to use for the process
237
238 Returns:
239 the size the File-by-File patch gzipped
240
241 Raises:
242 Exception: if there is a problem calling the binaries needed in the process
243 """
244
245 # Checks that the OS binaries needed are available
246 find_bins_or_die()
247 # Clean temp files
248 if os.path.exists(temp_path): os.remove(temp_path)
249
250 if save_patch_path:
251 filebyfile_patch_path = save_patch_path + '-file-by-file-patch'
252 else:
253 filebyfile_patch_path = temp_path + '.filebyfile'
254 gzipped_filebyfile_patch_path = filebyfile_patch_path + '.gz'
255 if os.path.exists(gzipped_filebyfile_patch_path):
256 os.remove(gzipped_filebyfile_patch_path)
257
258 # file by file patch
259 # We use a jar from https://github.com/andrewhayden/archive-patcher
260 if os.path.exists(filebyfile_patch_path): os.remove(filebyfile_patch_path)
261 p = subprocess.Popen(
262 [java_path, '-jar', 'lib/file-by-file-tools.jar', '--generate',
263 '--old', old_file, '--new', new_file, '--patch', filebyfile_patch_path],
264 shell=False)
265 ret_code = p.wait()
266 if ret_code != 0: raise Exception(
267 'Problem creating file by file patch, returned code: %s' % ret_code)
268
269 # gzip file by file patch and get its size
270 subprocess.check_output([gzip_path, '-9', filebyfile_patch_path])
271 gzipped_filebyfile_patch_size = os.stat(gzipped_filebyfile_patch_path).st_size
272 # Clean temp files
273 if os.path.exists(temp_path): os.remove(temp_path)
274 if not save_patch_path and os.path.exists(gzipped_filebyfile_patch_path):
275 os.remove(gzipped_filebyfile_patch_path)
276 return gzipped_filebyfile_patch_size
277
278
279 def main():
280 locale.setlocale(locale.LC_ALL, '')
281
282 parser = argparse.ArgumentParser(
283 description='Estimate the sizes of APK patches for Google Play')
284 parser.add_argument(
285 '--old-file', default=None, required=True,
286 help='the path to the "old" file to generate patches from.')
287 parser.add_argument(
288 '--new-file', default=None, required=True,
289 help='the path to the "new" file to generate patches from.')
290 parser.add_argument(
291 '--save-patch', default=None,
292 help='the path prefix to save the generated patches.')
293 parser.add_argument(
294 '--temp-dir', default='/tmp',
295 help='the temp directory to use for patch generation; defaults to /tmp')
296 if not sys.argv[1:]:
297 parser.print_help()
298 parser.exit()
299 args = parser.parse_args()
300
301 if not os.path.isfile(args.old_file):
302 raise Exception('File does not exist: %s' % args.old_file)
303 if not os.path.isfile(args.new_file):
304 raise Exception('File does not exist: %s' % args.new_file)
305 if args.save_patch and not os.access(
306 os.path.dirname(os.path.abspath(args.save_patch)), os.W_OK):
307 raise Exception('The save patch path is not writable: %s' % args.save_patch)
308 if args.save_patch and os.path.isdir(args.save_patch):
309 raise Exception('Please include the filename in the path: %s'
310 % args.save_patch)
311 save_patch_path = args.save_patch
312 if not os.path.isdir(args.temp_dir):
313 raise Exception('Temp directory does not exist: %s' % args.temp_dir)
314 temp_path = args.temp_dir + '/patch.tmp'
315
316 new_file_size = os.stat(args.new_file).st_size
317
318 bsdiff_size = calculate_bsdiff(
319 args.old_file, args.new_file, save_patch_path, temp_path)
320
321 gzipped_size = calculate_new_apk(args.new_file, temp_path)
322
323 # Calculate the size of the File-by-File patch gzipped
324 gzipped_filebyfile_patch_size = calculate_filebyfile(
325 args.old_file, args.new_file, save_patch_path, temp_path)
326
327 print ('\nNew APK size on disk: %s bytes [%s]'
328 % (locale.format('%d', new_file_size, grouping=True),
329 human_file_size(new_file_size)))
330
331 print '\nEstimated download size for new installs:'
332 print (' Full new APK (gzipped) size:'
333 ' %s bytes [%s]'
334 % (locale.format('%d', gzipped_size, grouping=True),
335 human_file_size(gzipped_size)))
336
337 print '\nEstimated download size for updates from the old APK, using Bsdiff:'
338 print (' Bsdiff patch (gzipped) size: %s bytes [%s]'
339 % (locale.format('%d', bsdiff_size, grouping=True),
340 human_file_size(bsdiff_size)))
341
342 print '\nEstimated download size for updates from the old APK,'
343 print ' using File-by-File:'
344 print (' File-by-File patch (gzipped) size: %s bytes [%s]\n'
345 % (locale.format('%d', gzipped_filebyfile_patch_size, grouping=True),
346 human_file_size(gzipped_filebyfile_patch_size)))
347
348
349 if __name__ == '__main__':
350 main()
OLDNEW
« no previous file with comments | « third_party/apk-patch-size-estimator/README.md ('k') | third_party/apk-patch-size-estimator/lib/file-by-file-tools.jar.sha1 » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698