tools/android/dexdiffer/dexdiffer.py - Issue 2057323002: Dexdiffer scripts

Unified Diff: tools/android/dexdiffer/dexdiffer.py

Issue 2057323002: Dexdiffer scripts (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Addressing Andrew's suggestions Created 4 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: tools/android/dexdiffer/dexdiffer.py

diff --git a/tools/android/dexdiffer/dexdiffer.py b/tools/android/dexdiffer/dexdiffer.py

new file mode 100644

index 0000000000000000000000000000000000000000..41f42b7c78cd75a7afcce4febd1a231544badf37

--- /dev/null

+++ b/tools/android/dexdiffer/dexdiffer.py

@@ -0,0 +1,276 @@

+# Use of this source code is governed by a BSD-style license that can be

+# found in the LICENSE file.

+"""Tool to diff 2 dex files that have been proguarded.

+To use this tool, first get dextra. http://newandroidbook.com/tools/dextra.html

+Then use the dextra binary on a classes.dex file like so:

+ dextra_binary -j -f -m classes.dex > output.dextra

+Do this for both the dex files you want to compare. Then, take the appropriate

+proguard mapping files uesd to generate those dex files, and use this script:

+ python dexdiffer.py mappingfile1 output1.dextra mappingfile2 output2.dextra

+"""

+import argparse

+import re

+import sys

+_QUALIFIERS = set(['public', 'protected', 'private', 'final', 'static',

+ 'abstract', 'volatile', 'native', 'enum'])

+def _IsNewClass(line):

+ return line.endswith(':')

+# Expects lines like one of these 3:

+# 'android.support.v8.MenuPopupHelper -> android.support.v8.v:'

+# ' android.view.LayoutInflater mInflater -> d'

+# ' 117:118:void setForceShowIcon(boolean) -> b'

+# Those three examples would return

+# 'android.support.v8.MenuPopupHelper', 'android.support.v8.v'

+# 'android.view.LayoutInflater mInflater', 'android.view.LayoutInflater d'

+# 'void setForceShowIcon(boolean)', 'void b(boolean)'

+def _ParseMappingLine(line):

+ line = line.rstrip(':')

+ # Stripping any line number denotations

+ line = re.sub(r'\d+:\d+:', '', line)

+ line = re.sub(r'\):\d+', ')', line)

+ original_name, new_name = line.split(' -> ')

+ type_string = ''

+ if ' ' in original_name:

+ type_string = original_name[:original_name.find(' ') + 1]

+ arguments_string = ''

+ match = re.search(r'($.*?$)', original_name)

+ if match:

+ arguments_string = match.group(1)

+ return original_name, type_string + new_name + arguments_string

+def _ReadMappingDict(mapping_file):

+ mapping = {}

+ renamed_class_name = ''

+ original_class_name = ''

+ for line in mapping_file:

+ line = line.strip()

+ if _IsNewClass(line):

+ if renamed_class_name:

+ mapping[renamed_class_name] = current_entry

+ member_mappings = {}

+ original_class_name, renamed_class_name = _ParseMappingLine(line)

+ current_entry = [original_class_name, member_mappings]

+ else:

+ original_member_name, renamed_member_name = _ParseMappingLine(line)

+ member_mappings[renamed_member_name] = original_member_name

+ mapping[renamed_class_name] = current_entry

+ return mapping

+def _StripComments(string):

+ # Remove all occurances of multiline comments (/*COMMENT*/)

+ string = re.sub(r'/\*.*?\*/', "", string, flags=re.DOTALL)

+ # Remove all occurances of single line comments (//COMMENT)

+ string = re.sub(r'//.*?$', "", string)

+ return string

+def _StripQuotes(string):

+ return re.sub(r'([\'"]).*?\1', '', string)

+def _RemoveQualifiers(string_tokens):

+ while string_tokens and string_tokens[0] in _QUALIFIERS:

+ string_tokens = string_tokens[1:]

+ return string_tokens

+def _GetLineTokens(line):

+ line = _StripComments(line)

+ # Match all alphanumeric + underscore with \w then cases for:

+ # '$', '<', '>', '{', '}', '[', ']', and '.'

+ tokens = re.findall(r'[\w\$\.<>\{\}\[\]]+', line)

+ return _RemoveQualifiers(tokens)

+def _IsClassDefinition(line_tokens):

+ return line_tokens and line_tokens[0] == 'class'

+def _IsEndOfClass_definition(line_tokens):

+ return line_tokens and line_tokens[-1] == '{'

+def _IsEndOfClass(line_tokens):

+ return line_tokens and line_tokens[-1] == '}'

+def _TypeLookup(renamed_type, mapping_dict):

+ renamed_type_stripped = renamed_type.strip('[]')

+ postfix = renamed_type.replace(renamed_type_stripped, '')

+ if renamed_type_stripped in mapping_dict:

+ real_type = mapping_dict[renamed_type_stripped][0]

+ else:

+ real_type = renamed_type_stripped

+ return real_type + postfix

+def _GetMemberIdentifier(line_tokens, mapping_dict, renamed_class_name,

+ is_function):

agrieve 2016/06/13 20:17:33 nit: indent

+ assert len(line_tokens) > 1

+ assert renamed_class_name in mapping_dict

+ mapping_entry = mapping_dict[renamed_class_name][1]

+ renamed_type = line_tokens[0]

+ real_type = _TypeLookup(renamed_type, mapping_dict)

+ renamed_name_token = line_tokens[1]

+ renamed_name_token, _, _ = renamed_name_token.partition('=')

+ function_args = ''

+ if is_function:

+ function_args += '('

+ for token in line_tokens[2:]:

+ function_args += _TypeLookup(token, mapping_dict) + ','

+ # Remove trailing ','

+ function_args = function_args.rstrip(',')

+ function_args += ')'

+ renamed_member_identifier = (real_type + ' ' + renamed_name_token

+ + function_args)

+ if renamed_member_identifier not in mapping_entry:

+ print 'Proguarded class which caused the issue:', renamed_class_name

+ print 'Key supposed to be in this dict:', mapping_entry

+ print 'Definition line tokens:', line_tokens

+ # This will be the real type + real_identifier + any real function args (if

+ # applicable)

+ return mapping_entry[renamed_member_identifier]

+def _GetClassNames(line_tokens, mapping_dict):

+ assert len(line_tokens) > 1

+ assert line_tokens[1] in mapping_dict

+ return line_tokens[1], mapping_dict[line_tokens[1]][0]

+def _IsLineFunctionDefinition(line):

+ line = _StripComments(line)

+ line = _StripQuotes(line)

+ return line.find('(') > 0 and line.find(')') > 0

+# Expects data from dextra -j -m -f

+# Returns dictionary mapping class name to list of members

+def _BuildMappedDexDict(dextra_file, mapping_dict):

+ # Have to add 'bool' -> 'boolean' mapping in dictionary, since for some reason

+ # dextra shortens boolean to bool.

+ mapping_dict['bool'] = ['boolean', {}]

+ dex_dict = {}

+ current_entry = []

+ reading_class_header = True

+ unmatched_string = False

+ for line in dextra_file:

+ # Accounting for multi line strings

+ if line.count('"') % 2:

+ unmatched_string = not unmatched_string

+ continue

+ if unmatched_string:

+ continue

+ line_tokens = _GetLineTokens(line)

+ if _IsClassDefinition(line_tokens):

+ reading_class_header = True

+ renamed_class_name, real_class_name = _GetClassNames(line_tokens,

+ mapping_dict)

+ if _IsEndOfClass_definition(line_tokens):

+ reading_class_header = False

+ continue

+ if _IsEndOfClass(line_tokens):

+ dex_dict[real_class_name] = current_entry

+ current_entry = []

+ continue

+ if not reading_class_header and line_tokens:

+ is_function = _IsLineFunctionDefinition(line)

+ member = _GetMemberIdentifier(line_tokens, mapping_dict,

+ renamed_class_name, is_function)

+ current_entry.append(member)

+ return dex_dict

+def _DiffDexDicts(dex_base, dex_new):

+ diffs = []

+ for key, base_class_members in dex_base.iteritems():

+ if key in dex_new:

+ # Class in both

+ base_class_members_set = set(base_class_members)

+ # Removing from dex_new to have just those which only appear in dex_new

+ # left over.

+ new_class_members_set = set(dex_new.pop(key))

+ if base_class_members_set == new_class_members_set:

+ continue

+ else:

+ # They are not equal

+ diff_string = key

+ for diff in base_class_members_set.difference(new_class_members_set):

+ # Base has stuff the new one doesn't

+ diff_string += '\n' + '- ' + diff

+ for diff in new_class_members_set.difference(base_class_members_set):

+ # New has stuff the base one doesn't

+ diff_string += '\n' + '+ ' + diff

+ diffs.append(diff_string)

+ else:

+ # Class not found in new

+ diff_string = '-class ' + key

+ diffs.append(diff_string)

+ if dex_new:

+ # Classes in new that have yet to be hit by base

+ for key in dex_new:

+ diff_string = '+class ' + key

+ diffs.append(diff_string)

+ return diffs

+def main():

+ parser = argparse.ArgumentParser()

+ parser.add_argument('base_mapping_file',

+ help='Mapping file from proguard output for base dex')

+ parser.add_argument('base_dextra_output',

+ help='dextra -j -f -m output for base dex')

+ parser.add_argument('new_mapping_file',

+ help='Mapping file from proguard output for new dex')

+ parser.add_argument('new_dextra_output',

+ help='dextra -j -f -m output for new dex')

+ args = parser.parse_args()

+ with open(args.base_mapping_file, "r") as f:

agrieve 2016/06/13 20:17:33 nit: single quotes. Although "r" is the default an

+ mapping_base = _ReadMappingDict(f)

+ with open(args.base_dextra_output, "r") as f:

+ dex_base = _BuildMappedDexDict(f, mapping_base)

+ with open(args.new_mapping_file, "r") as f:

+ mapping_new = _ReadMappingDict(f)

+ with open(args.new_dextra_output, "r") as f:

+ dex_new = _BuildMappedDexDict(f, mapping_new)

+ diffs = _DiffDexDicts(dex_base, dex_new)

+ if diffs:

+ for diff in diffs:

+ print diff

+if __name__ == '__main__':

+ main()

« no previous file with comments | « tools/android/dexdiffer/OWNERS ('k') | tools/android/dexdiffer/dexdiffer_unittest.py » ('j') | no next file with comments »