Chromium Code Reviews| Index: tools/android/dexdiffer/dexdiffer.py |
| diff --git a/tools/android/dexdiffer/dexdiffer.py b/tools/android/dexdiffer/dexdiffer.py |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..f990910f773c19a262fd631aa04ab0607e1b44de |
| --- /dev/null |
| +++ b/tools/android/dexdiffer/dexdiffer.py |
| @@ -0,0 +1,277 @@ |
| +# Copyright 2016 The Chromium Authors. All rights reserved. |
| +# Use of this source code is governed by a BSD-style license that can be |
| +# found in the LICENSE file. |
| +"""Tool to diff 2 dex files that have been proguarded. |
| + |
| +To use this tool, first get dextra. http://newandroidbook.com/tools/dextra.html |
| +Then use the dextra binary on a classes.dex file like so: |
| + dextra_binary -j -f -m classes.dex > output.dextra |
| +Do this for both the dex files you want to compare. Then, take the appropriate |
| +proguard mapping files uesd to generate those dex files, and use this script: |
| + python dexdiffer.py mappingfile1 output1.dextra mappingfile2 output2.dextra |
| +""" |
| + |
| +import re |
| +import sys |
| + |
|
agrieve
2016/06/13 16:57:01
Two blank lines between top-level functions
smaier
2016/06/13 18:57:16
Done.
|
| +def is_new_class(line): |
|
agrieve
2016/06/13 16:57:01
naming convention is def _IsNewClass()
smaier
2016/06/13 18:57:15
Done.
|
| + return line.endswith(':') |
| + |
| +# Expects lines like one of these 3: |
| +# 'android.support.v8.MenuPopupHelper -> android.support.v8.v:' |
| +# ' android.view.LayoutInflater mInflater -> d' |
| +# ' 117:118:void setForceShowIcon(boolean) -> b' |
| +# Those three examples would return |
| +# 'android.support.v8.MenuPopupHelper', 'android.support.v8.v' |
| +# 'android.view.LayoutInflater mInflater', 'android.view.LayoutInflater d' |
| +# 'void setForceShowIcon(boolean)', 'void b(boolean)' |
| +def parse_mapping_line(line): |
| + if line.endswith(':'): |
|
agrieve
2016/06/13 16:57:01
nit: line.rstrip(':')
smaier
2016/06/13 18:57:15
Done.
|
| + # Removing any trailing colons |
| + line = line[:-1] |
| + |
| + # Stripping any line number denotations |
| + line = re.sub(r'\d+:\d+:', '', line) |
| + line = re.sub(r'\):\d+', ')', line) |
| + |
| + split_string = line.split(' -> ') |
| + original_name, new_name = split_string[0], split_string[1] |
|
agrieve
2016/06/13 16:57:01
original_name, new_name = line.split(' -> ')
smaier
2016/06/13 18:57:15
Done.
|
| + |
| + type_string = '' |
| + if original_name.find(' ') >= 0: |
|
agrieve
2016/06/13 16:57:01
if ' ' in original_name, or cache result in a vari
smaier
2016/06/13 18:57:16
Done.
|
| + type_string = original_name[:original_name.find(' ') + 1] |
| + |
| + arguments_string = '' |
| + if original_name.find('(') and original_name.find(')'): |
|
agrieve
2016/06/13 16:57:01
this evaluates to true when no brackets exist. Mig
smaier
2016/06/13 18:57:17
Done.
|
| + arguments_string = original_name[ |
| + original_name.find('('):original_name.find(')') + 1] |
| + |
| + return original_name, type_string + new_name + arguments_string |
| + |
| +def read_mapping_dict(mappingFile): |
|
agrieve
2016/06/13 16:57:01
mappingFile->mapping_file
smaier
2016/06/13 18:57:16
Done.
|
| + mapping = {} |
| + renamed_class_name = '' |
| + original_class_name = '' |
| + for line in mappingFile: |
| + line = line.strip() |
| + if is_new_class(line): |
| + if renamed_class_name: |
| + mapping[renamed_class_name] = current_entry |
| + |
| + member_mappings = {} |
| + original_class_name, renamed_class_name = parse_mapping_line(line) |
| + current_entry = [original_class_name, member_mappings] |
| + else: |
| + original_member_name, renamed_member_name = parse_mapping_line(line) |
| + member_mappings[renamed_member_name] = original_member_name |
| + |
| + mapping[renamed_class_name] = current_entry |
| + return mapping |
| + |
| +def strip_comments(string): |
| + # Remove all occurances of multiline comments (/*COMMENT*/) |
| + string = re.sub(r'/\*.*?\*/', "", string, flags=re.DOTALL) |
| + # Remove all occurances of single line comments (//COMMENT) |
| + string = re.sub(r'//.*?$', "", string) |
| + return string |
| + |
| + |
| +def strip_quotes(string): |
|
agrieve
2016/06/13 16:57:00
nit: might be more readable to use re here:
retu
smaier
2016/06/13 18:57:17
Done.
|
| + i = string.find('"') |
| + j = string.find('"', i+1) |
| + while j != -1: |
| + string = string[:i] + string[j+1:] |
| + i = string.find('"') |
| + j = string.find('"', i+1) |
| + i = string.find('\'') |
| + j = string.find('\'', i+1) |
| + while j != -1: |
| + string = string[:i] + string[j+1:] |
| + i = string.find('\'') |
| + j = string.find('\'', i+1) |
| + return string |
| + |
| + |
| +def remove_qualifiers(string_tokens): |
| + qualifiers = ['public', 'protected', 'private', 'final', 'static', 'abstract', |
|
agrieve
2016/06/13 16:57:01
nit: make this a set and put it at the top of the
smaier
2016/06/13 18:57:15
Done.
|
| + 'volatile', 'native', 'enum'] |
| + while string_tokens and string_tokens[0] in qualifiers: |
| + string_tokens = string_tokens[1:] |
| + return string_tokens |
| + |
| +def get_line_tokens(line): |
| + line = strip_comments(line) |
| + # Match all alphanumeric + underscore with \w then cases for: |
| + # '$', '<', '>', '{', '}', '[', ']', and '.' |
| + tokens = re.findall(r'[\w\$\.<>\{\}\[\]]+', line) |
| + return remove_qualifiers(tokens) |
| + |
| + |
| +def is_class_definition(line_tokens): |
| + return line_tokens and line_tokens[0] == 'class' |
| + |
| + |
| +def is_end_of_class_definition(line_tokens): |
| + return line_tokens and line_tokens[-1] == '{' |
| + |
| + |
| +def is_end_of_class(line_tokens): |
| + return line_tokens and line_tokens[-1] == '}' |
| + |
| + |
| +def type_lookup(renamed_type, mapping_dict): |
| + renamed_type_stripped = renamed_type.strip('[]') |
| + postfix = renamed_type.replace(renamed_type_stripped, '') |
|
agrieve
2016/06/13 16:57:01
postfix -> suffix
smaier
2016/06/13 18:57:16
Typically modifiers/operators after a token are re
|
| + |
| + if renamed_type_stripped in mapping_dict: |
| + real_type = mapping_dict[renamed_type_stripped][0] |
| + else: |
| + real_type = renamed_type_stripped |
| + |
| + return real_type + postfix |
| + |
| + |
| +def get_member_identifier(line_tokens, mapping_dict, renamed_class_name, |
| + is_function): |
| + assert len(line_tokens) > 1 |
| + assert renamed_class_name in mapping_dict |
| + mapping_entry = mapping_dict[renamed_class_name][1] |
| + |
| + renamed_type = line_tokens[0] |
| + real_type = type_lookup(renamed_type, mapping_dict) |
| + |
| + renamed_name_token = line_tokens[1] |
| + renamed_name_token, _, _ = renamed_name_token.partition('=') |
| + |
| + function_args = '' |
| + if is_function: |
| + function_args += '(' |
| + for token in line_tokens[2:]: |
| + function_args += type_lookup(token, mapping_dict) + ',' |
| + # Remove trailing ',' |
| + if function_args.endswith(','): |
|
agrieve
2016/06/13 16:57:01
rstrip()
smaier
2016/06/13 18:57:15
Done.
|
| + function_args = function_args[:-1] |
| + function_args += ')' |
| + |
| + renamed_member_identifier = (real_type + ' ' + renamed_name_token |
| + + function_args) |
| + if renamed_member_identifier not in mapping_entry: |
| + print 'Proguarded class which caused the issue:', renamed_class_name |
| + print 'Key supposed to be in this dict:', mapping_entry |
| + print 'Definition line tokens:', line_tokens |
| + |
| + # This will be the real type + real_identifier + any real function args (if |
| + # applicable) |
| + return mapping_entry[renamed_member_identifier] |
| + |
| +def get_class_names(line_tokens, mapping_dict): |
| + assert len(line_tokens) > 1 |
| + assert line_tokens[1] in mapping_dict |
| + return line_tokens[1], mapping_dict[line_tokens[1]][0] |
| + |
| + |
| +def is_line_function_definition(line): |
| + line = strip_comments(line) |
| + line = strip_quotes(line) |
| + return line.find('(') > 0 and line.find(')') > 0 |
| + |
| +# Expects data from dextra -j -m -f |
| +# Returns dictionary mapping class name to list of members |
| +def build_mapped_dex_dict(dextra_file, mapping_dict): |
| + # Have to add 'bool' -> 'boolean' mapping in dictionary, since for some reason |
| + # dextra shortens boolean to bool. |
| + mapping_dict['bool'] = ['boolean', {}] |
| + dex_dict = {} |
| + current_entry = [] |
| + reading_class_header = True |
| + unmatched_string = False |
| + |
| + for line in dextra_file: |
| + # Accounting for multi line strings |
| + if line.count('"') % 2: |
| + unmatched_string = not unmatched_string |
| + continue |
| + if unmatched_string: |
| + continue |
| + |
| + line_tokens = get_line_tokens(line) |
| + if is_class_definition(line_tokens): |
| + reading_class_header = True |
| + renamed_class_name, real_class_name = get_class_names(line_tokens, |
| + mapping_dict) |
| + if is_end_of_class_definition(line_tokens): |
| + reading_class_header = False |
| + continue |
| + if is_end_of_class(line_tokens): |
| + dex_dict[real_class_name] = current_entry |
| + current_entry = [] |
| + continue |
| + |
| + if not reading_class_header and line_tokens: |
| + is_function = is_line_function_definition(line) |
| + member = get_member_identifier(line_tokens, mapping_dict, |
| + renamed_class_name, is_function) |
| + current_entry.append(member) |
| + |
| + return dex_dict |
| + |
| + |
| +def diff_dex_dicts(dex_base, dex_new): |
| + diffs = [] |
| + for key, base_class_members in dex_base.iteritems(): |
| + if key in dex_new: |
| + # Class in both |
| + base_class_members_set = set(base_class_members) |
| + # Removing from dex_new to have just those which only appear in dex_new |
| + # left over. |
| + new_class_members_set = set(dex_new.pop(key)) |
| + if (base_class_members_set.issubset(new_class_members_set) |
|
agrieve
2016/06/13 16:57:01
could you just say: base_class_members_set == new_
smaier
2016/06/13 18:57:15
Done.
|
| + and new_class_members_set.issubset(base_class_members_set)): |
| + # They are equal |
| + continue |
| + else: |
| + # They are not equal |
| + diff_string = key |
| + if base_class_members_set.difference(new_class_members_set): |
|
agrieve
2016/06/13 16:57:01
nit: store in a variable.
smaier
2016/06/13 18:57:16
Done.
|
| + # Base has stuff the new one doesn't |
| + for diff in base_class_members_set.difference(new_class_members_set): |
| + diff_string += '\n' + '- ' + diff |
| + if new_class_members_set.difference(base_class_members_set): |
| + # New has stuff the base one doesn't |
| + for diff in new_class_members_set.difference(base_class_members_set): |
| + diff_string += '\n' + '+ ' + diff |
| + diffs.append(diff_string) |
| + else: |
| + # Class not found in new |
| + diff_string = '-class ' + key |
| + diffs.append(diff_string) |
| + if dex_new: |
| + # Classes in new that have yet to be hit by base |
| + for key in dex_new: |
| + diff_string = '+class ' + key |
| + diffs.append(diff_string) |
| + |
| + return diffs |
| + |
| + |
| +def main(unused_argv): |
| + assert(len(unused_argv) == 4) |
|
agrieve
2016/06/13 16:57:00
this doesn't seem unused :P
smaier
2016/06/13 18:57:16
Done.
|
| + |
| + with open(unused_argv[0], "r") as f: |
|
agrieve
2016/06/13 16:57:00
You should at least have a usage text and support
smaier
2016/06/13 18:57:16
Done.
|
| + mapping_base = read_mapping_dict(f) |
| + with open(unused_argv[1], "r") as f: |
| + dex_base = build_mapped_dex_dict(f, mapping_base) |
| + with open(unused_argv[2], "r") as f: |
| + mapping_new = read_mapping_dict(f) |
| + with open(unused_argv[3], "r") as f: |
| + dex_new = build_mapped_dex_dict(f, mapping_new) |
| + |
| + diffs = diff_dex_dicts(dex_base, dex_new) |
| + if diffs: |
| + for diff in diffs: |
| + print diff |
| + |
| + |
| +if __name__ == '__main__': |
| + main(sys.argv[1:]) |
| + |