tools/android/dexdiffer/dexdiffer.py - Issue 2057323002: Dexdiffer scripts

Unified Diff: tools/android/dexdiffer/dexdiffer.py

Issue 2057323002: Dexdiffer scripts (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 4 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: tools/android/dexdiffer/dexdiffer.py

diff --git a/tools/android/dexdiffer/dexdiffer.py b/tools/android/dexdiffer/dexdiffer.py

new file mode 100644

index 0000000000000000000000000000000000000000..f990910f773c19a262fd631aa04ab0607e1b44de

--- /dev/null

+++ b/tools/android/dexdiffer/dexdiffer.py

@@ -0,0 +1,277 @@

+# Use of this source code is governed by a BSD-style license that can be

+# found in the LICENSE file.

+"""Tool to diff 2 dex files that have been proguarded.

+To use this tool, first get dextra. http://newandroidbook.com/tools/dextra.html

+Then use the dextra binary on a classes.dex file like so:

+ dextra_binary -j -f -m classes.dex > output.dextra

+Do this for both the dex files you want to compare. Then, take the appropriate

+proguard mapping files uesd to generate those dex files, and use this script:

+ python dexdiffer.py mappingfile1 output1.dextra mappingfile2 output2.dextra

+"""

+import re

+import sys

agrieve 2016/06/13 16:57:01 Two blank lines between top-level functions

smaier 2016/06/13 18:57:16 Done.

+def is_new_class(line):

agrieve 2016/06/13 16:57:01 naming convention is def _IsNewClass()

smaier 2016/06/13 18:57:15 Done.

+ return line.endswith(':')

+# Expects lines like one of these 3:

+# 'android.support.v8.MenuPopupHelper -> android.support.v8.v:'

+# ' android.view.LayoutInflater mInflater -> d'

+# ' 117:118:void setForceShowIcon(boolean) -> b'

+# Those three examples would return

+# 'android.support.v8.MenuPopupHelper', 'android.support.v8.v'

+# 'android.view.LayoutInflater mInflater', 'android.view.LayoutInflater d'

+# 'void setForceShowIcon(boolean)', 'void b(boolean)'

+def parse_mapping_line(line):

+ if line.endswith(':'):

agrieve 2016/06/13 16:57:01 nit: line.rstrip(':')

smaier 2016/06/13 18:57:15 Done.

+ # Removing any trailing colons

+ line = line[:-1]

+ # Stripping any line number denotations

+ line = re.sub(r'\d+:\d+:', '', line)

+ line = re.sub(r'\):\d+', ')', line)

+ split_string = line.split(' -> ')

+ original_name, new_name = split_string[0], split_string[1]

agrieve 2016/06/13 16:57:01 original_name, new_name = line.split(' -> ')

smaier 2016/06/13 18:57:15 Done.

+ type_string = ''

+ if original_name.find(' ') >= 0:

agrieve 2016/06/13 16:57:01 if ' ' in original_name, or cache result in a vari

smaier 2016/06/13 18:57:16 Done.

+ type_string = original_name[:original_name.find(' ') + 1]

+ arguments_string = ''

+ if original_name.find('(') and original_name.find(')'):

agrieve 2016/06/13 16:57:01 this evaluates to true when no brackets exist. Mig

smaier 2016/06/13 18:57:17 Done.

+ arguments_string = original_name[

+ original_name.find('('):original_name.find(')') + 1]

+ return original_name, type_string + new_name + arguments_string

+def read_mapping_dict(mappingFile):

agrieve 2016/06/13 16:57:01 mappingFile->mapping_file

smaier 2016/06/13 18:57:16 Done.

+ mapping = {}

+ renamed_class_name = ''

+ original_class_name = ''

+ for line in mappingFile:

+ line = line.strip()

+ if is_new_class(line):

+ if renamed_class_name:

+ mapping[renamed_class_name] = current_entry

+ member_mappings = {}

+ original_class_name, renamed_class_name = parse_mapping_line(line)

+ current_entry = [original_class_name, member_mappings]

+ else:

+ original_member_name, renamed_member_name = parse_mapping_line(line)

+ member_mappings[renamed_member_name] = original_member_name

+ mapping[renamed_class_name] = current_entry

+ return mapping

+def strip_comments(string):

+ # Remove all occurances of multiline comments (/*COMMENT*/)

+ string = re.sub(r'/\*.*?\*/', "", string, flags=re.DOTALL)

+ # Remove all occurances of single line comments (//COMMENT)

+ string = re.sub(r'//.*?$', "", string)

+ return string

+def strip_quotes(string):

agrieve 2016/06/13 16:57:00 nit: might be more readable to use re here: retu

smaier 2016/06/13 18:57:17 Done.

+ i = string.find('"')

+ j = string.find('"', i+1)

+ while j != -1:

+ string = string[:i] + string[j+1:]

+ i = string.find('"')

+ j = string.find('"', i+1)

+ i = string.find('\'')

+ j = string.find('\'', i+1)

+ while j != -1:

+ string = string[:i] + string[j+1:]

+ i = string.find('\'')

+ j = string.find('\'', i+1)

+ return string

+def remove_qualifiers(string_tokens):

+ qualifiers = ['public', 'protected', 'private', 'final', 'static', 'abstract',

agrieve 2016/06/13 16:57:01 nit: make this a set and put it at the top of the

smaier 2016/06/13 18:57:15 Done.

+ 'volatile', 'native', 'enum']

+ while string_tokens and string_tokens[0] in qualifiers:

+ string_tokens = string_tokens[1:]

+ return string_tokens

+def get_line_tokens(line):

+ line = strip_comments(line)

+ # Match all alphanumeric + underscore with \w then cases for:

+ # '$', '<', '>', '{', '}', '[', ']', and '.'

+ tokens = re.findall(r'[\w\$\.<>\{\}\[\]]+', line)

+ return remove_qualifiers(tokens)

+def is_class_definition(line_tokens):

+ return line_tokens and line_tokens[0] == 'class'

+def is_end_of_class_definition(line_tokens):

+ return line_tokens and line_tokens[-1] == '{'

+def is_end_of_class(line_tokens):

+ return line_tokens and line_tokens[-1] == '}'

+def type_lookup(renamed_type, mapping_dict):

+ renamed_type_stripped = renamed_type.strip('[]')

+ postfix = renamed_type.replace(renamed_type_stripped, '')

agrieve 2016/06/13 16:57:01 postfix -> suffix

smaier 2016/06/13 18:57:16 Typically modifiers/operators after a token are re

+ if renamed_type_stripped in mapping_dict:

+ real_type = mapping_dict[renamed_type_stripped][0]

+ else:

+ real_type = renamed_type_stripped

+ return real_type + postfix

+def get_member_identifier(line_tokens, mapping_dict, renamed_class_name,

+ is_function):

+ assert len(line_tokens) > 1

+ assert renamed_class_name in mapping_dict

+ mapping_entry = mapping_dict[renamed_class_name][1]

+ renamed_type = line_tokens[0]

+ real_type = type_lookup(renamed_type, mapping_dict)

+ renamed_name_token = line_tokens[1]

+ renamed_name_token, _, _ = renamed_name_token.partition('=')

+ function_args = ''

+ if is_function:

+ function_args += '('

+ for token in line_tokens[2:]:

+ function_args += type_lookup(token, mapping_dict) + ','

+ # Remove trailing ','

+ if function_args.endswith(','):

agrieve 2016/06/13 16:57:01 rstrip()

smaier 2016/06/13 18:57:15 Done.

+ function_args = function_args[:-1]

+ function_args += ')'

+ renamed_member_identifier = (real_type + ' ' + renamed_name_token

+ + function_args)

+ if renamed_member_identifier not in mapping_entry:

+ print 'Proguarded class which caused the issue:', renamed_class_name

+ print 'Key supposed to be in this dict:', mapping_entry

+ print 'Definition line tokens:', line_tokens

+ # This will be the real type + real_identifier + any real function args (if

+ # applicable)

+ return mapping_entry[renamed_member_identifier]

+def get_class_names(line_tokens, mapping_dict):

+ assert len(line_tokens) > 1

+ assert line_tokens[1] in mapping_dict

+ return line_tokens[1], mapping_dict[line_tokens[1]][0]

+def is_line_function_definition(line):

+ line = strip_comments(line)

+ line = strip_quotes(line)

+ return line.find('(') > 0 and line.find(')') > 0

+# Expects data from dextra -j -m -f

+# Returns dictionary mapping class name to list of members

+def build_mapped_dex_dict(dextra_file, mapping_dict):

+ # Have to add 'bool' -> 'boolean' mapping in dictionary, since for some reason

+ # dextra shortens boolean to bool.

+ mapping_dict['bool'] = ['boolean', {}]

+ dex_dict = {}

+ current_entry = []

+ reading_class_header = True

+ unmatched_string = False

+ for line in dextra_file:

+ # Accounting for multi line strings

+ if line.count('"') % 2:

+ unmatched_string = not unmatched_string

+ continue

+ if unmatched_string:

+ continue

+ line_tokens = get_line_tokens(line)

+ if is_class_definition(line_tokens):

+ reading_class_header = True

+ renamed_class_name, real_class_name = get_class_names(line_tokens,

+ mapping_dict)

+ if is_end_of_class_definition(line_tokens):

+ reading_class_header = False

+ continue

+ if is_end_of_class(line_tokens):

+ dex_dict[real_class_name] = current_entry

+ current_entry = []

+ continue

+ if not reading_class_header and line_tokens:

+ is_function = is_line_function_definition(line)

+ member = get_member_identifier(line_tokens, mapping_dict,

+ renamed_class_name, is_function)

+ current_entry.append(member)

+ return dex_dict

+def diff_dex_dicts(dex_base, dex_new):

+ diffs = []

+ for key, base_class_members in dex_base.iteritems():

+ if key in dex_new:

+ # Class in both

+ base_class_members_set = set(base_class_members)

+ # Removing from dex_new to have just those which only appear in dex_new

+ # left over.

+ new_class_members_set = set(dex_new.pop(key))

+ if (base_class_members_set.issubset(new_class_members_set)

agrieve 2016/06/13 16:57:01 could you just say: base_class_members_set == new_

smaier 2016/06/13 18:57:15 Done.

+ and new_class_members_set.issubset(base_class_members_set)):

+ # They are equal

+ continue

+ else:

+ # They are not equal

+ diff_string = key

+ if base_class_members_set.difference(new_class_members_set):

agrieve 2016/06/13 16:57:01 nit: store in a variable.

smaier 2016/06/13 18:57:16 Done.

+ # Base has stuff the new one doesn't

+ for diff in base_class_members_set.difference(new_class_members_set):

+ diff_string += '\n' + '- ' + diff

+ if new_class_members_set.difference(base_class_members_set):

+ # New has stuff the base one doesn't

+ for diff in new_class_members_set.difference(base_class_members_set):

+ diff_string += '\n' + '+ ' + diff

+ diffs.append(diff_string)

+ else:

+ # Class not found in new

+ diff_string = '-class ' + key

+ diffs.append(diff_string)

+ if dex_new:

+ # Classes in new that have yet to be hit by base

+ for key in dex_new:

+ diff_string = '+class ' + key

+ diffs.append(diff_string)

+ return diffs

+def main(unused_argv):

+ assert(len(unused_argv) == 4)

agrieve 2016/06/13 16:57:00 this doesn't seem unused :P

smaier 2016/06/13 18:57:16 Done.

+ with open(unused_argv[0], "r") as f:

agrieve 2016/06/13 16:57:00 You should at least have a usage text and support

smaier 2016/06/13 18:57:16 Done.

+ mapping_base = read_mapping_dict(f)

+ with open(unused_argv[1], "r") as f:

+ dex_base = build_mapped_dex_dict(f, mapping_base)

+ with open(unused_argv[2], "r") as f:

+ mapping_new = read_mapping_dict(f)

+ with open(unused_argv[3], "r") as f:

+ dex_new = build_mapped_dex_dict(f, mapping_new)

+ diffs = diff_dex_dicts(dex_base, dex_new)

+ if diffs:

+ for diff in diffs:

+ print diff

+if __name__ == '__main__':

+ main(sys.argv[1:])

« no previous file with comments | « tools/android/dexdiffer/OWNERS ('k') | tools/android/dexdiffer/dexdiffer_unittest.py » ('j') | no next file with comments »