| OLD | NEW |
| (Empty) | |
| 1 #!/usr/bin/python |
| 2 # |
| 3 # Copyright 2016 Google Inc. All Rights Reserved. |
| 4 # |
| 5 # Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 # you may not use this file except in compliance with the License. |
| 7 # You may obtain a copy of the License at |
| 8 # |
| 9 # http://www.apache.org/licenses/LICENSE-2.0 |
| 10 # |
| 11 # Unless required by applicable law or agreed to in writing, software |
| 12 # distributed under the License is distributed on an "AS IS" BASIS, |
| 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 # See the License for the specific language governing permissions and |
| 15 # limitations under the License. |
| 16 |
| 17 """Estimates the size of Google Play patches and the new gzipped APK. |
| 18 |
| 19 From two APKs it estimates the size of new patches as well as |
| 20 the size of a gzipped version of the APK, which would be used in |
| 21 cases where patches are unexpectedly large, unavailable, or unsuitable. |
| 22 Google Play uses multiple techniques to generate patches and generally picks |
| 23 the best match for the device. The best match is usually, but not always, the |
| 24 smallest patch file produced. The numbers that this script produces are |
| 25 ESTIMATES that can be used to characterize the impact of arbitrary changes to |
| 26 APKs. There is NO GUARANTEE that this tool produces the same patches or patch |
| 27 sizes that Google Play generates, stores or transmits, and the actual |
| 28 implementation within Google Play may change at any time, without notice. |
| 29 |
| 30 """ |
| 31 |
| 32 import sys |
| 33 import argparse |
| 34 import locale |
| 35 import math |
| 36 import os |
| 37 import subprocess |
| 38 |
| 39 bsdiff_path = None |
| 40 gzip_path = None |
| 41 head_path = None |
| 42 tail_path = None |
| 43 bunzip2_path = None |
| 44 java_path = None |
| 45 |
| 46 |
| 47 def find_bins_or_die(): |
| 48 """Checks that all the binaries needed are available. |
| 49 |
| 50 The script needs bsdiff, gzip, head, tail and bunzip2 |
| 51 binaries availables in the system. |
| 52 """ |
| 53 |
| 54 global bsdiff_path |
| 55 if not bsdiff_path: |
| 56 bsdiff_path = find_binary('bsdiff') |
| 57 global gzip_path |
| 58 if not gzip_path: |
| 59 gzip_path = find_binary('gzip') |
| 60 global head_path |
| 61 if not head_path: |
| 62 head_path = find_binary('head') |
| 63 global tail_path |
| 64 if not tail_path: |
| 65 tail_path = find_binary('tail') |
| 66 global bunzip2_path |
| 67 if not bunzip2_path: |
| 68 bunzip2_path = find_binary('bunzip2') |
| 69 global java_path |
| 70 if not java_path: |
| 71 java_path = find_binary('java') |
| 72 |
| 73 |
| 74 def find_binary(binary_name): |
| 75 """Finds the path of a binary.""" |
| 76 |
| 77 try: |
| 78 return subprocess.check_output(['which', binary_name]).strip() |
| 79 except subprocess.CalledProcessError: |
| 80 raise Exception( |
| 81 'No "' + binary_name + '" on PATH, please install or fix PATH.') |
| 82 |
| 83 |
| 84 def human_file_size(size): |
| 85 """Converts a byte size number into a human readable value.""" |
| 86 |
| 87 size = abs(size) |
| 88 if size == 0: |
| 89 return '0B' |
| 90 units = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB'] |
| 91 p = math.floor(math.log(size, 2) / 10) |
| 92 return '%.3g%s' % (size/math.pow(1024, p), units[int(p)]) |
| 93 |
| 94 |
| 95 def calculate_bsdiff(old_file, new_file, save_patch_path, temp_path): |
| 96 """Estimates the size the Bsdiff patch gzipped. |
| 97 |
| 98 Args: |
| 99 old_file: the old APK file |
| 100 new_file: the new APK file |
| 101 save_patch_path: the path including filename to save the generated patch. |
| 102 temp_path: the directory to use for the process |
| 103 |
| 104 Returns: |
| 105 a dictionary with: |
| 106 'gzipped_new_file_size': the estimated size of the new gzipped APK |
| 107 'bsdiff_patch_size': the estimated size of the patch from the two APKs |
| 108 |
| 109 Raises: |
| 110 Exception: if there is a problem calling the binaries needed in the process |
| 111 """ |
| 112 |
| 113 # Oddities: |
| 114 # Bsdiff forces bzip2 compression, which starts after byte 32. Bzip2 isn't |
| 115 # necessarily the best choice in all cases, and isn't necessarily what Google |
| 116 # Play uses, so it has to be uncompressed and rewritten with gzip. |
| 117 |
| 118 # Checks that the OS binaries needed are available |
| 119 find_bins_or_die() |
| 120 # Clean temp files |
| 121 if os.path.exists(temp_path): os.remove(temp_path) |
| 122 |
| 123 # Create the bsdiff of the two APKs |
| 124 subprocess.check_output( |
| 125 [bsdiff_path, old_file, new_file, temp_path]) |
| 126 |
| 127 # bsdiff paths |
| 128 raw_bsdiff_path = temp_path + '.raw_bsdiff' |
| 129 bzipped_bsdiff_path = raw_bsdiff_path + '.bz2' |
| 130 gzipped_bsdiff_path = raw_bsdiff_path + '.gz' |
| 131 bsdiff_header_path = temp_path + '.raw_bsdiff_header' |
| 132 if os.path.exists(raw_bsdiff_path): os.remove(raw_bsdiff_path) |
| 133 if os.path.exists(bzipped_bsdiff_path): os.remove(bzipped_bsdiff_path) |
| 134 if os.path.exists(gzipped_bsdiff_path): os.remove(gzipped_bsdiff_path) |
| 135 if os.path.exists(bsdiff_header_path): os.remove(bsdiff_header_path) |
| 136 |
| 137 # Strip the first 32 bytes the bsdiff file, which is a bsdiff-specific header. |
| 138 bsdiff_header = open(bsdiff_header_path, 'w') |
| 139 p = subprocess.Popen( |
| 140 [head_path, '-c', '32', bsdiff_header_path], |
| 141 shell=False, stdout=bsdiff_header) |
| 142 ret_code = p.wait() |
| 143 if ret_code != 0: |
| 144 raise Exception('Problem at the bsdiff step, returned code: %s' % ret_code) |
| 145 bsdiff_header.flush() |
| 146 bsdiff_header.close() |
| 147 |
| 148 # Take the remainder of the file to gain an uncompressed copy. |
| 149 bzipped_bsdiff_patch = open(bzipped_bsdiff_path, 'w') |
| 150 p = subprocess.Popen( |
| 151 [tail_path, '-c', '+33', temp_path], |
| 152 shell=False, stdout=bzipped_bsdiff_patch) |
| 153 ret_code = p.wait() |
| 154 if ret_code != 0: |
| 155 raise Exception('Problem at the tail step, returned code: %s' % ret_code) |
| 156 bzipped_bsdiff_patch.flush() |
| 157 bzipped_bsdiff_patch.close() |
| 158 subprocess.check_output([bunzip2_path, '-d', '-q', bzipped_bsdiff_path]) |
| 159 |
| 160 # Prepend the 32 bytes of bsdiff header back onto the uncompressed file. |
| 161 if save_patch_path: |
| 162 rebuilt_bsdiff_path = save_patch_path + '-bsdiff-patch' |
| 163 else: |
| 164 rebuilt_bsdiff_path = raw_bsdiff_path + '.rebuilt' |
| 165 gzipped_rebuilt_bsdiff_path = rebuilt_bsdiff_path + '.gz' |
| 166 if os.path.exists(rebuilt_bsdiff_path): os.remove(rebuilt_bsdiff_path) |
| 167 if os.path.exists(gzipped_rebuilt_bsdiff_path): |
| 168 os.remove(gzipped_rebuilt_bsdiff_path) |
| 169 rebuilt_bsdiff = open(rebuilt_bsdiff_path, 'w') |
| 170 p = subprocess.Popen( |
| 171 ['cat', bsdiff_header_path, raw_bsdiff_path], |
| 172 shell=False, stdout=rebuilt_bsdiff) |
| 173 ret_code = p.wait() |
| 174 if ret_code != 0: |
| 175 raise Exception('Problem at the cat step, returned code: %s' % ret_code) |
| 176 rebuilt_bsdiff.flush() |
| 177 rebuilt_bsdiff.close() |
| 178 |
| 179 # gzip the patch and get its size. |
| 180 subprocess.check_output([gzip_path, '-9', rebuilt_bsdiff_path]) |
| 181 bsdiff_patch_size = os.stat(gzipped_rebuilt_bsdiff_path).st_size |
| 182 |
| 183 # Clean up. |
| 184 if os.path.exists(temp_path): os.remove(temp_path) |
| 185 if os.path.exists(raw_bsdiff_path): os.remove(raw_bsdiff_path) |
| 186 if os.path.exists(bsdiff_header_path): os.remove(bsdiff_header_path) |
| 187 if os.path.exists(gzipped_bsdiff_path): os.remove(gzipped_bsdiff_path) |
| 188 if not save_patch_path and os.path.exists(gzipped_rebuilt_bsdiff_path): |
| 189 os.remove(gzipped_rebuilt_bsdiff_path) |
| 190 |
| 191 return bsdiff_patch_size |
| 192 |
| 193 |
| 194 def calculate_new_apk(new_file, temp_path): |
| 195 """Estimates the size the new APK gzipped. |
| 196 |
| 197 Args: |
| 198 new_file: the new APK file |
| 199 temp_path: the directory to use for the process |
| 200 |
| 201 Returns: |
| 202 the size of the new APK gzipped |
| 203 |
| 204 Raises: |
| 205 Exception: if there is a problem calling the binaries needed in the process |
| 206 """ |
| 207 |
| 208 # Checks that the OS binaries needed are available |
| 209 find_bins_or_die() |
| 210 # Clean temp files |
| 211 if os.path.exists(temp_path + '.gz'): os.remove(temp_path + '.gz') |
| 212 |
| 213 # gzip new APK and get its size |
| 214 gzipped_new_file = open(temp_path, 'w') |
| 215 p = subprocess.Popen( |
| 216 [gzip_path, '--keep', '-c', '-9', new_file], |
| 217 shell=False, stdout=gzipped_new_file) |
| 218 ret_code = p.wait() |
| 219 if ret_code != 0: raise Exception( |
| 220 'Problem gzipping the new APK, returned code: %s' % ret_code) |
| 221 gzipped_new_file.flush() |
| 222 gzipped_new_file.close() |
| 223 gzipped_size = os.stat(temp_path).st_size |
| 224 # Clean up |
| 225 if os.path.exists(temp_path + '.gz'): os.remove(temp_path + '.gz') |
| 226 return gzipped_size |
| 227 |
| 228 |
| 229 def calculate_filebyfile(old_file, new_file, save_patch_path, temp_path): |
| 230 """Estimates the size the File-by-File patch gzipped. |
| 231 |
| 232 Args: |
| 233 old_file: the old APK file |
| 234 new_file: the new APK file |
| 235 save_patch_path: the path including filename to save the generated patch. |
| 236 temp_path: the directory to use for the process |
| 237 |
| 238 Returns: |
| 239 the size the File-by-File patch gzipped |
| 240 |
| 241 Raises: |
| 242 Exception: if there is a problem calling the binaries needed in the process |
| 243 """ |
| 244 |
| 245 # Checks that the OS binaries needed are available |
| 246 find_bins_or_die() |
| 247 # Clean temp files |
| 248 if os.path.exists(temp_path): os.remove(temp_path) |
| 249 |
| 250 if save_patch_path: |
| 251 filebyfile_patch_path = save_patch_path + '-file-by-file-patch' |
| 252 else: |
| 253 filebyfile_patch_path = temp_path + '.filebyfile' |
| 254 gzipped_filebyfile_patch_path = filebyfile_patch_path + '.gz' |
| 255 if os.path.exists(gzipped_filebyfile_patch_path): |
| 256 os.remove(gzipped_filebyfile_patch_path) |
| 257 |
| 258 # file by file patch |
| 259 # We use a jar from https://github.com/andrewhayden/archive-patcher |
| 260 if os.path.exists(filebyfile_patch_path): os.remove(filebyfile_patch_path) |
| 261 p = subprocess.Popen( |
| 262 [java_path, '-jar', 'lib/file-by-file-tools.jar', '--generate', |
| 263 '--old', old_file, '--new', new_file, '--patch', filebyfile_patch_path], |
| 264 shell=False) |
| 265 ret_code = p.wait() |
| 266 if ret_code != 0: raise Exception( |
| 267 'Problem creating file by file patch, returned code: %s' % ret_code) |
| 268 |
| 269 # gzip file by file patch and get its size |
| 270 subprocess.check_output([gzip_path, '-9', filebyfile_patch_path]) |
| 271 gzipped_filebyfile_patch_size = os.stat(gzipped_filebyfile_patch_path).st_size |
| 272 # Clean temp files |
| 273 if os.path.exists(temp_path): os.remove(temp_path) |
| 274 if not save_patch_path and os.path.exists(gzipped_filebyfile_patch_path): |
| 275 os.remove(gzipped_filebyfile_patch_path) |
| 276 return gzipped_filebyfile_patch_size |
| 277 |
| 278 |
| 279 def main(): |
| 280 locale.setlocale(locale.LC_ALL, '') |
| 281 |
| 282 parser = argparse.ArgumentParser( |
| 283 description='Estimate the sizes of APK patches for Google Play') |
| 284 parser.add_argument( |
| 285 '--old-file', default=None, required=True, |
| 286 help='the path to the "old" file to generate patches from.') |
| 287 parser.add_argument( |
| 288 '--new-file', default=None, required=True, |
| 289 help='the path to the "new" file to generate patches from.') |
| 290 parser.add_argument( |
| 291 '--save-patch', default=None, |
| 292 help='the path prefix to save the generated patches.') |
| 293 parser.add_argument( |
| 294 '--temp-dir', default='/tmp', |
| 295 help='the temp directory to use for patch generation; defaults to /tmp') |
| 296 if not sys.argv[1:]: |
| 297 parser.print_help() |
| 298 parser.exit() |
| 299 args = parser.parse_args() |
| 300 |
| 301 if not os.path.isfile(args.old_file): |
| 302 raise Exception('File does not exist: %s' % args.old_file) |
| 303 if not os.path.isfile(args.new_file): |
| 304 raise Exception('File does not exist: %s' % args.new_file) |
| 305 if args.save_patch and not os.access( |
| 306 os.path.dirname(os.path.abspath(args.save_patch)), os.W_OK): |
| 307 raise Exception('The save patch path is not writable: %s' % args.save_patch) |
| 308 if args.save_patch and os.path.isdir(args.save_patch): |
| 309 raise Exception('Please include the filename in the path: %s' |
| 310 % args.save_patch) |
| 311 save_patch_path = args.save_patch |
| 312 if not os.path.isdir(args.temp_dir): |
| 313 raise Exception('Temp directory does not exist: %s' % args.temp_dir) |
| 314 temp_path = args.temp_dir + '/patch.tmp' |
| 315 |
| 316 new_file_size = os.stat(args.new_file).st_size |
| 317 |
| 318 bsdiff_size = calculate_bsdiff( |
| 319 args.old_file, args.new_file, save_patch_path, temp_path) |
| 320 |
| 321 gzipped_size = calculate_new_apk(args.new_file, temp_path) |
| 322 |
| 323 # Calculate the size of the File-by-File patch gzipped |
| 324 gzipped_filebyfile_patch_size = calculate_filebyfile( |
| 325 args.old_file, args.new_file, save_patch_path, temp_path) |
| 326 |
| 327 print ('\nNew APK size on disk: %s bytes [%s]' |
| 328 % (locale.format('%d', new_file_size, grouping=True), |
| 329 human_file_size(new_file_size))) |
| 330 |
| 331 print '\nEstimated download size for new installs:' |
| 332 print (' Full new APK (gzipped) size:' |
| 333 ' %s bytes [%s]' |
| 334 % (locale.format('%d', gzipped_size, grouping=True), |
| 335 human_file_size(gzipped_size))) |
| 336 |
| 337 print '\nEstimated download size for updates from the old APK, using Bsdiff:' |
| 338 print (' Bsdiff patch (gzipped) size: %s bytes [%s]' |
| 339 % (locale.format('%d', bsdiff_size, grouping=True), |
| 340 human_file_size(bsdiff_size))) |
| 341 |
| 342 print '\nEstimated download size for updates from the old APK,' |
| 343 print ' using File-by-File:' |
| 344 print (' File-by-File patch (gzipped) size: %s bytes [%s]\n' |
| 345 % (locale.format('%d', gzipped_filebyfile_patch_size, grouping=True), |
| 346 human_file_size(gzipped_filebyfile_patch_size))) |
| 347 |
| 348 |
| 349 if __name__ == '__main__': |
| 350 main() |
| OLD | NEW |