tools/android/dexdiffer/dexdiffer.py - Issue 2057323002: Dexdiffer scripts

Side by Side Diff: tools/android/dexdiffer/dexdiffer.py

Issue 2057323002: Dexdiffer scripts (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 4 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
(Empty)
	1 # Copyright 2016 The Chromium Authors. All rights reserved.

	2 # Use of this source code is governed by a BSD-style license that can be

	3 # found in the LICENSE file.

	4 """Tool to diff 2 dex files that have been proguarded.

	5

	6 To use this tool, first get dextra. http://newandroidbook.com/tools/dextra.html

	7 Then use the dextra binary on a classes.dex file like so:

	8 dextra_binary -j -f -m classes.dex > output.dextra

	9 Do this for both the dex files you want to compare. Then, take the appropriate

	10 proguard mapping files uesd to generate those dex files, and use this script:

	11 python dexdiffer.py mappingfile1 output1.dextra mappingfile2 output2.dextra

	12 """

	13

	14 import re

	15 import sys

	16
	agrieve 2016/06/13 16:57:01 Two blank lines between top-level functions Two blank lines between top-level functions smaier 2016/06/13 18:57:16 Done. Show quoted text On 2016/06/13 16:57:01, agrieve wrote: > Two blank lines between top-level functions Done.
	17 def is_new_class(line):
	agrieve 2016/06/13 16:57:01 naming convention is def _IsNewClass() naming convention is def _IsNewClass() smaier 2016/06/13 18:57:15 Done. Show quoted text On 2016/06/13 16:57:01, agrieve wrote: > naming convention is def _IsNewClass() Done.
	18 return line.endswith(':')

	19

	20 # Expects lines like one of these 3:

	21 # 'android.support.v8.MenuPopupHelper -> android.support.v8.v:'

	22 # ' android.view.LayoutInflater mInflater -> d'

	23 # ' 117:118:void setForceShowIcon(boolean) -> b'

	24 # Those three examples would return

	25 # 'android.support.v8.MenuPopupHelper', 'android.support.v8.v'

	26 # 'android.view.LayoutInflater mInflater', 'android.view.LayoutInflater d'

	27 # 'void setForceShowIcon(boolean)', 'void b(boolean)'

	28 def parse_mapping_line(line):

	29 if line.endswith(':'):
	agrieve 2016/06/13 16:57:01 nit: line.rstrip(':') nit: line.rstrip(':') smaier 2016/06/13 18:57:15 Done. Show quoted text On 2016/06/13 16:57:01, agrieve wrote: > nit: line.rstrip(':') Done.
	30 # Removing any trailing colons

	31 line = line[:-1]

	32

	33 # Stripping any line number denotations

	34 line = re.sub(r'\d+:\d+:', '', line)

	35 line = re.sub(r'\):\d+', ')', line)

	36

	37 split_string = line.split(' -> ')

	38 original_name, new_name = split_string[0], split_string[1]
	agrieve 2016/06/13 16:57:01 original_name, new_name = line.split(' -> ') original_name, new_name = line.split(' -> ') smaier 2016/06/13 18:57:15 Done. Show quoted text On 2016/06/13 16:57:01, agrieve wrote: > original_name, new_name = line.split(' -> ') Done.
	39

	40 type_string = ''

	41 if original_name.find(' ') >= 0:
	agrieve 2016/06/13 16:57:01 if ' ' in original_name, or cache result in a vari if ' ' in original_name, or cache result in a variable to not have to call find() again within the condition smaier 2016/06/13 18:57:16 Done. Show quoted text On 2016/06/13 16:57:01, agrieve wrote: > if ' ' in original_name, or cache result in a variable to not have to call > find() again within the condition Done.
	42 type_string = original_name[:original_name.find(' ') + 1]

	43

	44 arguments_string = ''

	45 if original_name.find('(') and original_name.find(')'):
	agrieve 2016/06/13 16:57:01 this evaluates to true when no brackets exist. Mig this evaluates to true when no brackets exist. Might make sense to use re.search for these: match = re.search(r'$(.?)$', original_name) if match: arguments_string = match.groups(1) smaier* 2016/06/13 18:57:17 Done. Show quoted text On 2016/06/13 16:57:01, agrieve wrote: > this evaluates to true when no brackets exist. Might make sense to use re.search > for these: > > match = re.search(r'$(.*?)$', original_name) > if match: > arguments_string = match.groups(1) Done.
	46 arguments_string = original_name[

	47 original_name.find('('):original_name.find(')') + 1]

	48

	49 return original_name, type_string + new_name + arguments_string

	50

	51 def read_mapping_dict(mappingFile):
	agrieve 2016/06/13 16:57:01 mappingFile->mapping_file mappingFile->mapping_file smaier 2016/06/13 18:57:16 Done. Show quoted text On 2016/06/13 16:57:01, agrieve wrote: > mappingFile->mapping_file Done.
	52 mapping = {}

	53 renamed_class_name = ''

	54 original_class_name = ''

	55 for line in mappingFile:

	56 line = line.strip()

	57 if is_new_class(line):

	58 if renamed_class_name:

	59 mapping[renamed_class_name] = current_entry

	60

	61 member_mappings = {}

	62 original_class_name, renamed_class_name = parse_mapping_line(line)

	63 current_entry = [original_class_name, member_mappings]

	64 else:

	65 original_member_name, renamed_member_name = parse_mapping_line(line)

	66 member_mappings[renamed_member_name] = original_member_name

	67

	68 mapping[renamed_class_name] = current_entry

	69 return mapping

	70

	71 def strip_comments(string):

	72 # Remove all occurances of multiline comments (/COMMENT/)

	73 string = re.sub(r'/\.?\*/', "", string, flags=re.DOTALL)

	74 # Remove all occurances of single line comments (//COMMENT)

	75 string = re.sub(r'//.*?$', "", string)

	76 return string

	77

	78

	79 def strip_quotes(string):
	agrieve 2016/06/13 16:57:00 nit: might be more readable to use re here: retu nit: might be more readable to use re here: return re.sub(r'([\'"]).?\1', '', string) smaier* 2016/06/13 18:57:17 Done. Show quoted text On 2016/06/13 16:57:00, agrieve wrote: > nit: might be more readable to use re here: > return re.sub(r'([\'"]).*?\1', '', string) Done.
	80 i = string.find('"')

	81 j = string.find('"', i+1)

	82 while j != -1:

	83 string = string[:i] + string[j+1:]

	84 i = string.find('"')

	85 j = string.find('"', i+1)

	86 i = string.find('\'')

	87 j = string.find('\'', i+1)

	88 while j != -1:

	89 string = string[:i] + string[j+1:]

	90 i = string.find('\'')

	91 j = string.find('\'', i+1)

	92 return string

	93

	94

	95 def remove_qualifiers(string_tokens):

	96 qualifiers = ['public', 'protected', 'private', 'final', 'static', 'abstract',
	agrieve 2016/06/13 16:57:01 nit: make this a set and put it at the top of the nit: make this a set and put it at the top of the file: _QUALIFIERS = set([...]) smaier 2016/06/13 18:57:15 Done. Show quoted text On 2016/06/13 16:57:01, agrieve wrote: > nit: make this a set and put it at the top of the file: > > _QUALIFIERS = set([...]) Done.
	97 'volatile', 'native', 'enum']

	98 while string_tokens and string_tokens[0] in qualifiers:

	99 string_tokens = string_tokens[1:]

	100 return string_tokens

	101

	102 def get_line_tokens(line):

	103 line = strip_comments(line)

	104 # Match all alphanumeric + underscore with \w then cases for:

	105 # '$', '<', '>', '{', '}', '[', ']', and '.'

	106 tokens = re.findall(r'[\w\$\.<>\{\}\[\]]+', line)

	107 return remove_qualifiers(tokens)

	108

	109

	110 def is_class_definition(line_tokens):

	111 return line_tokens and line_tokens[0] == 'class'

	112

	113

	114 def is_end_of_class_definition(line_tokens):

	115 return line_tokens and line_tokens[-1] == '{'

	116

	117

	118 def is_end_of_class(line_tokens):

	119 return line_tokens and line_tokens[-1] == '}'

	120

	121

	122 def type_lookup(renamed_type, mapping_dict):

	123 renamed_type_stripped = renamed_type.strip('[]')

	124 postfix = renamed_type.replace(renamed_type_stripped, '')
	agrieve 2016/06/13 16:57:01 postfix -> suffix postfix -> suffix smaier 2016/06/13 18:57:16 Typically modifiers/operators after a token are re Show quoted text On 2016/06/13 16:57:01, agrieve wrote: > postfix -> suffix Typically modifiers/operators after a token are referred to as "postfix"
	125

	126 if renamed_type_stripped in mapping_dict:

	127 real_type = mapping_dict[renamed_type_stripped][0]

	128 else:

	129 real_type = renamed_type_stripped

	130

	131 return real_type + postfix

	132

	133

	134 def get_member_identifier(line_tokens, mapping_dict, renamed_class_name,

	135 is_function):

	136 assert len(line_tokens) > 1

	137 assert renamed_class_name in mapping_dict

	138 mapping_entry = mapping_dict[renamed_class_name][1]

	139

	140 renamed_type = line_tokens[0]

	141 real_type = type_lookup(renamed_type, mapping_dict)

	142

	143 renamed_name_token = line_tokens[1]

	144 renamed_name_token, _, _ = renamed_name_token.partition('=')

	145

	146 function_args = ''

	147 if is_function:

	148 function_args += '('

	149 for token in line_tokens[2:]:

	150 function_args += type_lookup(token, mapping_dict) + ','

	151 # Remove trailing ','

	152 if function_args.endswith(','):
	agrieve 2016/06/13 16:57:01 rstrip() rstrip() smaier 2016/06/13 18:57:15 Done. Show quoted text On 2016/06/13 16:57:01, agrieve wrote: > rstrip() Done.
	153 function_args = function_args[:-1]

	154 function_args += ')'

	155

	156 renamed_member_identifier = (real_type + ' ' + renamed_name_token

	157 + function_args)

	158 if renamed_member_identifier not in mapping_entry:

	159 print 'Proguarded class which caused the issue:', renamed_class_name

	160 print 'Key supposed to be in this dict:', mapping_entry

	161 print 'Definition line tokens:', line_tokens

	162

	163 # This will be the real type + real_identifier + any real function args (if

	164 # applicable)

	165 return mapping_entry[renamed_member_identifier]

	166

	167 def get_class_names(line_tokens, mapping_dict):

	168 assert len(line_tokens) > 1

	169 assert line_tokens[1] in mapping_dict

	170 return line_tokens[1], mapping_dict[line_tokens[1]][0]

	171

	172

	173 def is_line_function_definition(line):

	174 line = strip_comments(line)

	175 line = strip_quotes(line)

	176 return line.find('(') > 0 and line.find(')') > 0

	177

	178 # Expects data from dextra -j -m -f

	179 # Returns dictionary mapping class name to list of members

	180 def build_mapped_dex_dict(dextra_file, mapping_dict):

	181 # Have to add 'bool' -> 'boolean' mapping in dictionary, since for some reason

	182 # dextra shortens boolean to bool.

	183 mapping_dict['bool'] = ['boolean', {}]

	184 dex_dict = {}

	185 current_entry = []

	186 reading_class_header = True

	187 unmatched_string = False

	188

	189 for line in dextra_file:

	190 # Accounting for multi line strings

	191 if line.count('"') % 2:

	192 unmatched_string = not unmatched_string

	193 continue

	194 if unmatched_string:

	195 continue

	196

	197 line_tokens = get_line_tokens(line)

	198 if is_class_definition(line_tokens):

	199 reading_class_header = True

	200 renamed_class_name, real_class_name = get_class_names(line_tokens,

	201 mapping_dict)

	202 if is_end_of_class_definition(line_tokens):

	203 reading_class_header = False

	204 continue

	205 if is_end_of_class(line_tokens):

	206 dex_dict[real_class_name] = current_entry

	207 current_entry = []

	208 continue

	209

	210 if not reading_class_header and line_tokens:

	211 is_function = is_line_function_definition(line)

	212 member = get_member_identifier(line_tokens, mapping_dict,

	213 renamed_class_name, is_function)

	214 current_entry.append(member)

	215

	216 return dex_dict

	217

	218

	219 def diff_dex_dicts(dex_base, dex_new):

	220 diffs = []

	221 for key, base_class_members in dex_base.iteritems():

	222 if key in dex_new:

	223 # Class in both

	224 base_class_members_set = set(base_class_members)

	225 # Removing from dex_new to have just those which only appear in dex_new

	226 # left over.

	227 new_class_members_set = set(dex_new.pop(key))

	228 if (base_class_members_set.issubset(new_class_members_set)
	agrieve 2016/06/13 16:57:01 could you just say: base_class_members_set == new_ could you just say: base_class_members_set == new_class_members_set smaier 2016/06/13 18:57:15 Done. Show quoted text On 2016/06/13 16:57:01, agrieve wrote: > could you just say: base_class_members_set == new_class_members_set Done.
	229 and new_class_members_set.issubset(base_class_members_set)):

	230 # They are equal

	231 continue

	232 else:

	233 # They are not equal

	234 diff_string = key

	235 if base_class_members_set.difference(new_class_members_set):
	agrieve 2016/06/13 16:57:01 nit: store in a variable. nit: store in a variable. smaier 2016/06/13 18:57:16 Done. Show quoted text On 2016/06/13 16:57:01, agrieve wrote: > nit: store in a variable. Done.
	236 # Base has stuff the new one doesn't

	237 for diff in base_class_members_set.difference(new_class_members_set):

	238 diff_string += '\n' + '- ' + diff

	239 if new_class_members_set.difference(base_class_members_set):

	240 # New has stuff the base one doesn't

	241 for diff in new_class_members_set.difference(base_class_members_set):

	242 diff_string += '\n' + '+ ' + diff

	243 diffs.append(diff_string)

	244 else:

	245 # Class not found in new

	246 diff_string = '-class ' + key

	247 diffs.append(diff_string)

	248 if dex_new:

	249 # Classes in new that have yet to be hit by base

	250 for key in dex_new:

	251 diff_string = '+class ' + key

	252 diffs.append(diff_string)

	253

	254 return diffs

	255

	256

	257 def main(unused_argv):

	258 assert(len(unused_argv) == 4)
	agrieve 2016/06/13 16:57:00 this doesn't seem unused :P this doesn't seem unused :P smaier 2016/06/13 18:57:16 Done. Show quoted text On 2016/06/13 16:57:00, agrieve wrote: > this doesn't seem unused :P Done.
	259

	260 with open(unused_argv[0], "r") as f:
	agrieve 2016/06/13 16:57:00 You should at least have a usage text and support You should at least have a usage text and support --help. Easiest way is to use argparse module. smaier 2016/06/13 18:57:16 Done. Show quoted text On 2016/06/13 16:57:00, agrieve wrote: > You should at least have a usage text and support --help. > > Easiest way is to use argparse module. Done.
	261 mapping_base = read_mapping_dict(f)

	262 with open(unused_argv[1], "r") as f:

	263 dex_base = build_mapped_dex_dict(f, mapping_base)

	264 with open(unused_argv[2], "r") as f:

	265 mapping_new = read_mapping_dict(f)

	266 with open(unused_argv[3], "r") as f:

	267 dex_new = build_mapped_dex_dict(f, mapping_new)

	268

	269 diffs = diff_dex_dicts(dex_base, dex_new)

	270 if diffs:

	271 for diff in diffs:

	272 print diff

	273

	274

	275 if __name__ == '__main__':

	276 main(sys.argv[1:])

	277

OLD	NEW

« no previous file with comments | « tools/android/dexdiffer/OWNERS ('k') | tools/android/dexdiffer/dexdiffer_unittest.py » ('j') | no next file with comments »