Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. | |
| 2 # Use of this source code is governed by a BSD-style license that can be | |
| 3 # found in the LICENSE file. | |
| 4 """Tool to diff 2 dex files that have been proguarded. | |
| 5 | |
| 6 To use this tool, first get dextra. http://newandroidbook.com/tools/dextra.html | |
| 7 Then use the dextra binary on a classes.dex file like so: | |
| 8 dextra_binary -j -f -m classes.dex > output.dextra | |
| 9 Do this for both the dex files you want to compare. Then, take the appropriate | |
| 10 proguard mapping files uesd to generate those dex files, and use this script: | |
| 11 python dexdiffer.py mappingfile1 output1.dextra mappingfile2 output2.dextra | |
| 12 """ | |
| 13 | |
| 14 import re | |
| 15 import sys | |
| 16 | |
|
agrieve
2016/06/13 16:57:01
Two blank lines between top-level functions
smaier
2016/06/13 18:57:16
Done.
| |
| 17 def is_new_class(line): | |
|
agrieve
2016/06/13 16:57:01
naming convention is def _IsNewClass()
smaier
2016/06/13 18:57:15
Done.
| |
| 18 return line.endswith(':') | |
| 19 | |
| 20 # Expects lines like one of these 3: | |
| 21 # 'android.support.v8.MenuPopupHelper -> android.support.v8.v:' | |
| 22 # ' android.view.LayoutInflater mInflater -> d' | |
| 23 # ' 117:118:void setForceShowIcon(boolean) -> b' | |
| 24 # Those three examples would return | |
| 25 # 'android.support.v8.MenuPopupHelper', 'android.support.v8.v' | |
| 26 # 'android.view.LayoutInflater mInflater', 'android.view.LayoutInflater d' | |
| 27 # 'void setForceShowIcon(boolean)', 'void b(boolean)' | |
| 28 def parse_mapping_line(line): | |
| 29 if line.endswith(':'): | |
|
agrieve
2016/06/13 16:57:01
nit: line.rstrip(':')
smaier
2016/06/13 18:57:15
Done.
| |
| 30 # Removing any trailing colons | |
| 31 line = line[:-1] | |
| 32 | |
| 33 # Stripping any line number denotations | |
| 34 line = re.sub(r'\d+:\d+:', '', line) | |
| 35 line = re.sub(r'\):\d+', ')', line) | |
| 36 | |
| 37 split_string = line.split(' -> ') | |
| 38 original_name, new_name = split_string[0], split_string[1] | |
|
agrieve
2016/06/13 16:57:01
original_name, new_name = line.split(' -> ')
smaier
2016/06/13 18:57:15
Done.
| |
| 39 | |
| 40 type_string = '' | |
| 41 if original_name.find(' ') >= 0: | |
|
agrieve
2016/06/13 16:57:01
if ' ' in original_name, or cache result in a vari
smaier
2016/06/13 18:57:16
Done.
| |
| 42 type_string = original_name[:original_name.find(' ') + 1] | |
| 43 | |
| 44 arguments_string = '' | |
| 45 if original_name.find('(') and original_name.find(')'): | |
|
agrieve
2016/06/13 16:57:01
this evaluates to true when no brackets exist. Mig
smaier
2016/06/13 18:57:17
Done.
| |
| 46 arguments_string = original_name[ | |
| 47 original_name.find('('):original_name.find(')') + 1] | |
| 48 | |
| 49 return original_name, type_string + new_name + arguments_string | |
| 50 | |
| 51 def read_mapping_dict(mappingFile): | |
|
agrieve
2016/06/13 16:57:01
mappingFile->mapping_file
smaier
2016/06/13 18:57:16
Done.
| |
| 52 mapping = {} | |
| 53 renamed_class_name = '' | |
| 54 original_class_name = '' | |
| 55 for line in mappingFile: | |
| 56 line = line.strip() | |
| 57 if is_new_class(line): | |
| 58 if renamed_class_name: | |
| 59 mapping[renamed_class_name] = current_entry | |
| 60 | |
| 61 member_mappings = {} | |
| 62 original_class_name, renamed_class_name = parse_mapping_line(line) | |
| 63 current_entry = [original_class_name, member_mappings] | |
| 64 else: | |
| 65 original_member_name, renamed_member_name = parse_mapping_line(line) | |
| 66 member_mappings[renamed_member_name] = original_member_name | |
| 67 | |
| 68 mapping[renamed_class_name] = current_entry | |
| 69 return mapping | |
| 70 | |
| 71 def strip_comments(string): | |
| 72 # Remove all occurances of multiline comments (/*COMMENT*/) | |
| 73 string = re.sub(r'/\*.*?\*/', "", string, flags=re.DOTALL) | |
| 74 # Remove all occurances of single line comments (//COMMENT) | |
| 75 string = re.sub(r'//.*?$', "", string) | |
| 76 return string | |
| 77 | |
| 78 | |
| 79 def strip_quotes(string): | |
|
agrieve
2016/06/13 16:57:00
nit: might be more readable to use re here:
retu
smaier
2016/06/13 18:57:17
Done.
| |
| 80 i = string.find('"') | |
| 81 j = string.find('"', i+1) | |
| 82 while j != -1: | |
| 83 string = string[:i] + string[j+1:] | |
| 84 i = string.find('"') | |
| 85 j = string.find('"', i+1) | |
| 86 i = string.find('\'') | |
| 87 j = string.find('\'', i+1) | |
| 88 while j != -1: | |
| 89 string = string[:i] + string[j+1:] | |
| 90 i = string.find('\'') | |
| 91 j = string.find('\'', i+1) | |
| 92 return string | |
| 93 | |
| 94 | |
| 95 def remove_qualifiers(string_tokens): | |
| 96 qualifiers = ['public', 'protected', 'private', 'final', 'static', 'abstract', | |
|
agrieve
2016/06/13 16:57:01
nit: make this a set and put it at the top of the
smaier
2016/06/13 18:57:15
Done.
| |
| 97 'volatile', 'native', 'enum'] | |
| 98 while string_tokens and string_tokens[0] in qualifiers: | |
| 99 string_tokens = string_tokens[1:] | |
| 100 return string_tokens | |
| 101 | |
| 102 def get_line_tokens(line): | |
| 103 line = strip_comments(line) | |
| 104 # Match all alphanumeric + underscore with \w then cases for: | |
| 105 # '$', '<', '>', '{', '}', '[', ']', and '.' | |
| 106 tokens = re.findall(r'[\w\$\.<>\{\}\[\]]+', line) | |
| 107 return remove_qualifiers(tokens) | |
| 108 | |
| 109 | |
| 110 def is_class_definition(line_tokens): | |
| 111 return line_tokens and line_tokens[0] == 'class' | |
| 112 | |
| 113 | |
| 114 def is_end_of_class_definition(line_tokens): | |
| 115 return line_tokens and line_tokens[-1] == '{' | |
| 116 | |
| 117 | |
| 118 def is_end_of_class(line_tokens): | |
| 119 return line_tokens and line_tokens[-1] == '}' | |
| 120 | |
| 121 | |
| 122 def type_lookup(renamed_type, mapping_dict): | |
| 123 renamed_type_stripped = renamed_type.strip('[]') | |
| 124 postfix = renamed_type.replace(renamed_type_stripped, '') | |
|
agrieve
2016/06/13 16:57:01
postfix -> suffix
smaier
2016/06/13 18:57:16
Typically modifiers/operators after a token are re
| |
| 125 | |
| 126 if renamed_type_stripped in mapping_dict: | |
| 127 real_type = mapping_dict[renamed_type_stripped][0] | |
| 128 else: | |
| 129 real_type = renamed_type_stripped | |
| 130 | |
| 131 return real_type + postfix | |
| 132 | |
| 133 | |
| 134 def get_member_identifier(line_tokens, mapping_dict, renamed_class_name, | |
| 135 is_function): | |
| 136 assert len(line_tokens) > 1 | |
| 137 assert renamed_class_name in mapping_dict | |
| 138 mapping_entry = mapping_dict[renamed_class_name][1] | |
| 139 | |
| 140 renamed_type = line_tokens[0] | |
| 141 real_type = type_lookup(renamed_type, mapping_dict) | |
| 142 | |
| 143 renamed_name_token = line_tokens[1] | |
| 144 renamed_name_token, _, _ = renamed_name_token.partition('=') | |
| 145 | |
| 146 function_args = '' | |
| 147 if is_function: | |
| 148 function_args += '(' | |
| 149 for token in line_tokens[2:]: | |
| 150 function_args += type_lookup(token, mapping_dict) + ',' | |
| 151 # Remove trailing ',' | |
| 152 if function_args.endswith(','): | |
|
agrieve
2016/06/13 16:57:01
rstrip()
smaier
2016/06/13 18:57:15
Done.
| |
| 153 function_args = function_args[:-1] | |
| 154 function_args += ')' | |
| 155 | |
| 156 renamed_member_identifier = (real_type + ' ' + renamed_name_token | |
| 157 + function_args) | |
| 158 if renamed_member_identifier not in mapping_entry: | |
| 159 print 'Proguarded class which caused the issue:', renamed_class_name | |
| 160 print 'Key supposed to be in this dict:', mapping_entry | |
| 161 print 'Definition line tokens:', line_tokens | |
| 162 | |
| 163 # This will be the real type + real_identifier + any real function args (if | |
| 164 # applicable) | |
| 165 return mapping_entry[renamed_member_identifier] | |
| 166 | |
| 167 def get_class_names(line_tokens, mapping_dict): | |
| 168 assert len(line_tokens) > 1 | |
| 169 assert line_tokens[1] in mapping_dict | |
| 170 return line_tokens[1], mapping_dict[line_tokens[1]][0] | |
| 171 | |
| 172 | |
| 173 def is_line_function_definition(line): | |
| 174 line = strip_comments(line) | |
| 175 line = strip_quotes(line) | |
| 176 return line.find('(') > 0 and line.find(')') > 0 | |
| 177 | |
| 178 # Expects data from dextra -j -m -f | |
| 179 # Returns dictionary mapping class name to list of members | |
| 180 def build_mapped_dex_dict(dextra_file, mapping_dict): | |
| 181 # Have to add 'bool' -> 'boolean' mapping in dictionary, since for some reason | |
| 182 # dextra shortens boolean to bool. | |
| 183 mapping_dict['bool'] = ['boolean', {}] | |
| 184 dex_dict = {} | |
| 185 current_entry = [] | |
| 186 reading_class_header = True | |
| 187 unmatched_string = False | |
| 188 | |
| 189 for line in dextra_file: | |
| 190 # Accounting for multi line strings | |
| 191 if line.count('"') % 2: | |
| 192 unmatched_string = not unmatched_string | |
| 193 continue | |
| 194 if unmatched_string: | |
| 195 continue | |
| 196 | |
| 197 line_tokens = get_line_tokens(line) | |
| 198 if is_class_definition(line_tokens): | |
| 199 reading_class_header = True | |
| 200 renamed_class_name, real_class_name = get_class_names(line_tokens, | |
| 201 mapping_dict) | |
| 202 if is_end_of_class_definition(line_tokens): | |
| 203 reading_class_header = False | |
| 204 continue | |
| 205 if is_end_of_class(line_tokens): | |
| 206 dex_dict[real_class_name] = current_entry | |
| 207 current_entry = [] | |
| 208 continue | |
| 209 | |
| 210 if not reading_class_header and line_tokens: | |
| 211 is_function = is_line_function_definition(line) | |
| 212 member = get_member_identifier(line_tokens, mapping_dict, | |
| 213 renamed_class_name, is_function) | |
| 214 current_entry.append(member) | |
| 215 | |
| 216 return dex_dict | |
| 217 | |
| 218 | |
| 219 def diff_dex_dicts(dex_base, dex_new): | |
| 220 diffs = [] | |
| 221 for key, base_class_members in dex_base.iteritems(): | |
| 222 if key in dex_new: | |
| 223 # Class in both | |
| 224 base_class_members_set = set(base_class_members) | |
| 225 # Removing from dex_new to have just those which only appear in dex_new | |
| 226 # left over. | |
| 227 new_class_members_set = set(dex_new.pop(key)) | |
| 228 if (base_class_members_set.issubset(new_class_members_set) | |
|
agrieve
2016/06/13 16:57:01
could you just say: base_class_members_set == new_
smaier
2016/06/13 18:57:15
Done.
| |
| 229 and new_class_members_set.issubset(base_class_members_set)): | |
| 230 # They are equal | |
| 231 continue | |
| 232 else: | |
| 233 # They are not equal | |
| 234 diff_string = key | |
| 235 if base_class_members_set.difference(new_class_members_set): | |
|
agrieve
2016/06/13 16:57:01
nit: store in a variable.
smaier
2016/06/13 18:57:16
Done.
| |
| 236 # Base has stuff the new one doesn't | |
| 237 for diff in base_class_members_set.difference(new_class_members_set): | |
| 238 diff_string += '\n' + '- ' + diff | |
| 239 if new_class_members_set.difference(base_class_members_set): | |
| 240 # New has stuff the base one doesn't | |
| 241 for diff in new_class_members_set.difference(base_class_members_set): | |
| 242 diff_string += '\n' + '+ ' + diff | |
| 243 diffs.append(diff_string) | |
| 244 else: | |
| 245 # Class not found in new | |
| 246 diff_string = '-class ' + key | |
| 247 diffs.append(diff_string) | |
| 248 if dex_new: | |
| 249 # Classes in new that have yet to be hit by base | |
| 250 for key in dex_new: | |
| 251 diff_string = '+class ' + key | |
| 252 diffs.append(diff_string) | |
| 253 | |
| 254 return diffs | |
| 255 | |
| 256 | |
| 257 def main(unused_argv): | |
| 258 assert(len(unused_argv) == 4) | |
|
agrieve
2016/06/13 16:57:00
this doesn't seem unused :P
smaier
2016/06/13 18:57:16
Done.
| |
| 259 | |
| 260 with open(unused_argv[0], "r") as f: | |
|
agrieve
2016/06/13 16:57:00
You should at least have a usage text and support
smaier
2016/06/13 18:57:16
Done.
| |
| 261 mapping_base = read_mapping_dict(f) | |
| 262 with open(unused_argv[1], "r") as f: | |
| 263 dex_base = build_mapped_dex_dict(f, mapping_base) | |
| 264 with open(unused_argv[2], "r") as f: | |
| 265 mapping_new = read_mapping_dict(f) | |
| 266 with open(unused_argv[3], "r") as f: | |
| 267 dex_new = build_mapped_dex_dict(f, mapping_new) | |
| 268 | |
| 269 diffs = diff_dex_dicts(dex_base, dex_new) | |
| 270 if diffs: | |
| 271 for diff in diffs: | |
| 272 print diff | |
| 273 | |
| 274 | |
| 275 if __name__ == '__main__': | |
| 276 main(sys.argv[1:]) | |
| 277 | |
| OLD | NEW |