Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. | |
| 2 # Use of this source code is governed by a BSD-style license that can be | |
| 3 # found in the LICENSE file. | |
| 4 """Tool to diff 2 dex files that have been proguarded. | |
| 5 | |
| 6 To use this tool, first get dextra. http://newandroidbook.com/tools/dextra.html | |
| 7 Then use the dextra binary on a classes.dex file like so: | |
| 8 dextra_binary -j -f -m classes.dex > output.dextra | |
| 9 Do this for both the dex files you want to compare. Then, take the appropriate | |
| 10 proguard mapping files uesd to generate those dex files, and use this script: | |
| 11 python dexdiffer.py mappingfile1 output1.dextra mappingfile2 output2.dextra | |
| 12 """ | |
| 13 | |
| 14 import argparse | |
| 15 import re | |
| 16 import sys | |
| 17 | |
| 18 | |
| 19 _QUALIFIERS = set(['public', 'protected', 'private', 'final', 'static', | |
| 20 'abstract', 'volatile', 'native', 'enum']) | |
| 21 | |
| 22 | |
| 23 def _IsNewClass(line): | |
| 24 return line.endswith(':') | |
| 25 | |
| 26 | |
| 27 # Expects lines like one of these 3: | |
| 28 # 'android.support.v8.MenuPopupHelper -> android.support.v8.v:' | |
| 29 # ' android.view.LayoutInflater mInflater -> d' | |
| 30 # ' 117:118:void setForceShowIcon(boolean) -> b' | |
| 31 # Those three examples would return | |
| 32 # 'android.support.v8.MenuPopupHelper', 'android.support.v8.v' | |
| 33 # 'android.view.LayoutInflater mInflater', 'android.view.LayoutInflater d' | |
| 34 # 'void setForceShowIcon(boolean)', 'void b(boolean)' | |
| 35 def _ParseMappingLine(line): | |
| 36 line = line.rstrip(':') | |
| 37 | |
| 38 # Stripping any line number denotations | |
| 39 line = re.sub(r'\d+:\d+:', '', line) | |
| 40 line = re.sub(r'\):\d+', ')', line) | |
| 41 | |
| 42 original_name, new_name = line.split(' -> ') | |
| 43 | |
| 44 type_string = '' | |
| 45 if ' ' in original_name: | |
| 46 type_string = original_name[:original_name.find(' ') + 1] | |
| 47 | |
| 48 arguments_string = '' | |
| 49 match = re.search(r'(\(.*?\))', original_name) | |
| 50 if match: | |
| 51 arguments_string = match.group(1) | |
| 52 | |
| 53 return original_name, type_string + new_name + arguments_string | |
| 54 | |
| 55 | |
| 56 def _ReadMappingDict(mapping_file): | |
| 57 mapping = {} | |
| 58 renamed_class_name = '' | |
| 59 original_class_name = '' | |
| 60 for line in mapping_file: | |
| 61 line = line.strip() | |
| 62 if _IsNewClass(line): | |
| 63 if renamed_class_name: | |
| 64 mapping[renamed_class_name] = current_entry | |
| 65 | |
| 66 member_mappings = {} | |
| 67 original_class_name, renamed_class_name = _ParseMappingLine(line) | |
| 68 current_entry = [original_class_name, member_mappings] | |
| 69 else: | |
| 70 original_member_name, renamed_member_name = _ParseMappingLine(line) | |
| 71 member_mappings[renamed_member_name] = original_member_name | |
| 72 | |
| 73 mapping[renamed_class_name] = current_entry | |
| 74 return mapping | |
| 75 | |
| 76 | |
| 77 def _StripComments(string): | |
| 78 # Remove all occurances of multiline comments (/*COMMENT*/) | |
| 79 string = re.sub(r'/\*.*?\*/', "", string, flags=re.DOTALL) | |
| 80 # Remove all occurances of single line comments (//COMMENT) | |
| 81 string = re.sub(r'//.*?$', "", string) | |
| 82 return string | |
| 83 | |
| 84 | |
| 85 def _StripQuotes(string): | |
| 86 return re.sub(r'([\'"]).*?\1', '', string) | |
| 87 | |
| 88 | |
| 89 def _RemoveQualifiers(string_tokens): | |
| 90 while string_tokens and string_tokens[0] in _QUALIFIERS: | |
| 91 string_tokens = string_tokens[1:] | |
| 92 return string_tokens | |
| 93 | |
| 94 | |
| 95 def _GetLineTokens(line): | |
| 96 line = _StripComments(line) | |
| 97 # Match all alphanumeric + underscore with \w then cases for: | |
| 98 # '$', '<', '>', '{', '}', '[', ']', and '.' | |
| 99 tokens = re.findall(r'[\w\$\.<>\{\}\[\]]+', line) | |
| 100 return _RemoveQualifiers(tokens) | |
| 101 | |
| 102 | |
| 103 def _IsClassDefinition(line_tokens): | |
| 104 return line_tokens and line_tokens[0] == 'class' | |
| 105 | |
| 106 | |
| 107 def _IsEndOfClass_definition(line_tokens): | |
| 108 return line_tokens and line_tokens[-1] == '{' | |
| 109 | |
| 110 | |
| 111 def _IsEndOfClass(line_tokens): | |
| 112 return line_tokens and line_tokens[-1] == '}' | |
| 113 | |
| 114 | |
| 115 def _TypeLookup(renamed_type, mapping_dict): | |
| 116 renamed_type_stripped = renamed_type.strip('[]') | |
| 117 postfix = renamed_type.replace(renamed_type_stripped, '') | |
| 118 | |
| 119 if renamed_type_stripped in mapping_dict: | |
| 120 real_type = mapping_dict[renamed_type_stripped][0] | |
| 121 else: | |
| 122 real_type = renamed_type_stripped | |
| 123 | |
| 124 return real_type + postfix | |
| 125 | |
| 126 | |
| 127 def _GetMemberIdentifier(line_tokens, mapping_dict, renamed_class_name, | |
| 128 is_function): | |
|
agrieve
2016/06/13 20:17:33
nit: indent
| |
| 129 assert len(line_tokens) > 1 | |
| 130 assert renamed_class_name in mapping_dict | |
| 131 mapping_entry = mapping_dict[renamed_class_name][1] | |
| 132 | |
| 133 renamed_type = line_tokens[0] | |
| 134 real_type = _TypeLookup(renamed_type, mapping_dict) | |
| 135 | |
| 136 renamed_name_token = line_tokens[1] | |
| 137 renamed_name_token, _, _ = renamed_name_token.partition('=') | |
| 138 | |
| 139 function_args = '' | |
| 140 if is_function: | |
| 141 function_args += '(' | |
| 142 for token in line_tokens[2:]: | |
| 143 function_args += _TypeLookup(token, mapping_dict) + ',' | |
| 144 # Remove trailing ',' | |
| 145 function_args = function_args.rstrip(',') | |
| 146 function_args += ')' | |
| 147 | |
| 148 renamed_member_identifier = (real_type + ' ' + renamed_name_token | |
| 149 + function_args) | |
| 150 if renamed_member_identifier not in mapping_entry: | |
| 151 print 'Proguarded class which caused the issue:', renamed_class_name | |
| 152 print 'Key supposed to be in this dict:', mapping_entry | |
| 153 print 'Definition line tokens:', line_tokens | |
| 154 | |
| 155 # This will be the real type + real_identifier + any real function args (if | |
| 156 # applicable) | |
| 157 return mapping_entry[renamed_member_identifier] | |
| 158 | |
| 159 | |
| 160 def _GetClassNames(line_tokens, mapping_dict): | |
| 161 assert len(line_tokens) > 1 | |
| 162 assert line_tokens[1] in mapping_dict | |
| 163 return line_tokens[1], mapping_dict[line_tokens[1]][0] | |
| 164 | |
| 165 | |
| 166 def _IsLineFunctionDefinition(line): | |
| 167 line = _StripComments(line) | |
| 168 line = _StripQuotes(line) | |
| 169 return line.find('(') > 0 and line.find(')') > 0 | |
| 170 | |
| 171 | |
| 172 # Expects data from dextra -j -m -f | |
| 173 # Returns dictionary mapping class name to list of members | |
| 174 def _BuildMappedDexDict(dextra_file, mapping_dict): | |
| 175 # Have to add 'bool' -> 'boolean' mapping in dictionary, since for some reason | |
| 176 # dextra shortens boolean to bool. | |
| 177 mapping_dict['bool'] = ['boolean', {}] | |
| 178 dex_dict = {} | |
| 179 current_entry = [] | |
| 180 reading_class_header = True | |
| 181 unmatched_string = False | |
| 182 | |
| 183 for line in dextra_file: | |
| 184 # Accounting for multi line strings | |
| 185 if line.count('"') % 2: | |
| 186 unmatched_string = not unmatched_string | |
| 187 continue | |
| 188 if unmatched_string: | |
| 189 continue | |
| 190 | |
| 191 line_tokens = _GetLineTokens(line) | |
| 192 if _IsClassDefinition(line_tokens): | |
| 193 reading_class_header = True | |
| 194 renamed_class_name, real_class_name = _GetClassNames(line_tokens, | |
| 195 mapping_dict) | |
| 196 if _IsEndOfClass_definition(line_tokens): | |
| 197 reading_class_header = False | |
| 198 continue | |
| 199 if _IsEndOfClass(line_tokens): | |
| 200 dex_dict[real_class_name] = current_entry | |
| 201 current_entry = [] | |
| 202 continue | |
| 203 | |
| 204 if not reading_class_header and line_tokens: | |
| 205 is_function = _IsLineFunctionDefinition(line) | |
| 206 member = _GetMemberIdentifier(line_tokens, mapping_dict, | |
| 207 renamed_class_name, is_function) | |
| 208 current_entry.append(member) | |
| 209 | |
| 210 return dex_dict | |
| 211 | |
| 212 | |
| 213 def _DiffDexDicts(dex_base, dex_new): | |
| 214 diffs = [] | |
| 215 for key, base_class_members in dex_base.iteritems(): | |
| 216 if key in dex_new: | |
| 217 # Class in both | |
| 218 base_class_members_set = set(base_class_members) | |
| 219 # Removing from dex_new to have just those which only appear in dex_new | |
| 220 # left over. | |
| 221 new_class_members_set = set(dex_new.pop(key)) | |
| 222 if base_class_members_set == new_class_members_set: | |
| 223 continue | |
| 224 else: | |
| 225 # They are not equal | |
| 226 diff_string = key | |
| 227 for diff in base_class_members_set.difference(new_class_members_set): | |
| 228 # Base has stuff the new one doesn't | |
| 229 diff_string += '\n' + '- ' + diff | |
| 230 for diff in new_class_members_set.difference(base_class_members_set): | |
| 231 # New has stuff the base one doesn't | |
| 232 diff_string += '\n' + '+ ' + diff | |
| 233 diffs.append(diff_string) | |
| 234 else: | |
| 235 # Class not found in new | |
| 236 diff_string = '-class ' + key | |
| 237 diffs.append(diff_string) | |
| 238 if dex_new: | |
| 239 # Classes in new that have yet to be hit by base | |
| 240 for key in dex_new: | |
| 241 diff_string = '+class ' + key | |
| 242 diffs.append(diff_string) | |
| 243 | |
| 244 return diffs | |
| 245 | |
| 246 | |
| 247 def main(): | |
| 248 parser = argparse.ArgumentParser() | |
| 249 parser.add_argument('base_mapping_file', | |
| 250 help='Mapping file from proguard output for base dex') | |
| 251 parser.add_argument('base_dextra_output', | |
| 252 help='dextra -j -f -m output for base dex') | |
| 253 parser.add_argument('new_mapping_file', | |
| 254 help='Mapping file from proguard output for new dex') | |
| 255 parser.add_argument('new_dextra_output', | |
| 256 help='dextra -j -f -m output for new dex') | |
| 257 args = parser.parse_args() | |
| 258 | |
| 259 with open(args.base_mapping_file, "r") as f: | |
|
agrieve
2016/06/13 20:17:33
nit: single quotes. Although "r" is the default an
| |
| 260 mapping_base = _ReadMappingDict(f) | |
| 261 with open(args.base_dextra_output, "r") as f: | |
| 262 dex_base = _BuildMappedDexDict(f, mapping_base) | |
| 263 with open(args.new_mapping_file, "r") as f: | |
| 264 mapping_new = _ReadMappingDict(f) | |
| 265 with open(args.new_dextra_output, "r") as f: | |
| 266 dex_new = _BuildMappedDexDict(f, mapping_new) | |
| 267 | |
| 268 diffs = _DiffDexDicts(dex_base, dex_new) | |
| 269 if diffs: | |
| 270 for diff in diffs: | |
| 271 print diff | |
| 272 | |
| 273 | |
| 274 if __name__ == '__main__': | |
| 275 main() | |
| 276 | |
| OLD | NEW |