Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(225)

Unified Diff: tools/android/dexdiffer/dexdiffer.py

Issue 2057323002: Dexdiffer scripts (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « tools/android/dexdiffer/OWNERS ('k') | tools/android/dexdiffer/dexdiffer_unittest.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tools/android/dexdiffer/dexdiffer.py
diff --git a/tools/android/dexdiffer/dexdiffer.py b/tools/android/dexdiffer/dexdiffer.py
new file mode 100644
index 0000000000000000000000000000000000000000..f990910f773c19a262fd631aa04ab0607e1b44de
--- /dev/null
+++ b/tools/android/dexdiffer/dexdiffer.py
@@ -0,0 +1,277 @@
+# Copyright 2016 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""Tool to diff 2 dex files that have been proguarded.
+
+To use this tool, first get dextra. http://newandroidbook.com/tools/dextra.html
+Then use the dextra binary on a classes.dex file like so:
+ dextra_binary -j -f -m classes.dex > output.dextra
+Do this for both the dex files you want to compare. Then, take the appropriate
+proguard mapping files uesd to generate those dex files, and use this script:
+ python dexdiffer.py mappingfile1 output1.dextra mappingfile2 output2.dextra
+"""
+
+import re
+import sys
+
agrieve 2016/06/13 16:57:01 Two blank lines between top-level functions
smaier 2016/06/13 18:57:16 Done.
+def is_new_class(line):
agrieve 2016/06/13 16:57:01 naming convention is def _IsNewClass()
smaier 2016/06/13 18:57:15 Done.
+ return line.endswith(':')
+
+# Expects lines like one of these 3:
+# 'android.support.v8.MenuPopupHelper -> android.support.v8.v:'
+# ' android.view.LayoutInflater mInflater -> d'
+# ' 117:118:void setForceShowIcon(boolean) -> b'
+# Those three examples would return
+# 'android.support.v8.MenuPopupHelper', 'android.support.v8.v'
+# 'android.view.LayoutInflater mInflater', 'android.view.LayoutInflater d'
+# 'void setForceShowIcon(boolean)', 'void b(boolean)'
+def parse_mapping_line(line):
+ if line.endswith(':'):
agrieve 2016/06/13 16:57:01 nit: line.rstrip(':')
smaier 2016/06/13 18:57:15 Done.
+ # Removing any trailing colons
+ line = line[:-1]
+
+ # Stripping any line number denotations
+ line = re.sub(r'\d+:\d+:', '', line)
+ line = re.sub(r'\):\d+', ')', line)
+
+ split_string = line.split(' -> ')
+ original_name, new_name = split_string[0], split_string[1]
agrieve 2016/06/13 16:57:01 original_name, new_name = line.split(' -> ')
smaier 2016/06/13 18:57:15 Done.
+
+ type_string = ''
+ if original_name.find(' ') >= 0:
agrieve 2016/06/13 16:57:01 if ' ' in original_name, or cache result in a vari
smaier 2016/06/13 18:57:16 Done.
+ type_string = original_name[:original_name.find(' ') + 1]
+
+ arguments_string = ''
+ if original_name.find('(') and original_name.find(')'):
agrieve 2016/06/13 16:57:01 this evaluates to true when no brackets exist. Mig
smaier 2016/06/13 18:57:17 Done.
+ arguments_string = original_name[
+ original_name.find('('):original_name.find(')') + 1]
+
+ return original_name, type_string + new_name + arguments_string
+
+def read_mapping_dict(mappingFile):
agrieve 2016/06/13 16:57:01 mappingFile->mapping_file
smaier 2016/06/13 18:57:16 Done.
+ mapping = {}
+ renamed_class_name = ''
+ original_class_name = ''
+ for line in mappingFile:
+ line = line.strip()
+ if is_new_class(line):
+ if renamed_class_name:
+ mapping[renamed_class_name] = current_entry
+
+ member_mappings = {}
+ original_class_name, renamed_class_name = parse_mapping_line(line)
+ current_entry = [original_class_name, member_mappings]
+ else:
+ original_member_name, renamed_member_name = parse_mapping_line(line)
+ member_mappings[renamed_member_name] = original_member_name
+
+ mapping[renamed_class_name] = current_entry
+ return mapping
+
+def strip_comments(string):
+ # Remove all occurances of multiline comments (/*COMMENT*/)
+ string = re.sub(r'/\*.*?\*/', "", string, flags=re.DOTALL)
+ # Remove all occurances of single line comments (//COMMENT)
+ string = re.sub(r'//.*?$', "", string)
+ return string
+
+
+def strip_quotes(string):
agrieve 2016/06/13 16:57:00 nit: might be more readable to use re here: retu
smaier 2016/06/13 18:57:17 Done.
+ i = string.find('"')
+ j = string.find('"', i+1)
+ while j != -1:
+ string = string[:i] + string[j+1:]
+ i = string.find('"')
+ j = string.find('"', i+1)
+ i = string.find('\'')
+ j = string.find('\'', i+1)
+ while j != -1:
+ string = string[:i] + string[j+1:]
+ i = string.find('\'')
+ j = string.find('\'', i+1)
+ return string
+
+
+def remove_qualifiers(string_tokens):
+ qualifiers = ['public', 'protected', 'private', 'final', 'static', 'abstract',
agrieve 2016/06/13 16:57:01 nit: make this a set and put it at the top of the
smaier 2016/06/13 18:57:15 Done.
+ 'volatile', 'native', 'enum']
+ while string_tokens and string_tokens[0] in qualifiers:
+ string_tokens = string_tokens[1:]
+ return string_tokens
+
+def get_line_tokens(line):
+ line = strip_comments(line)
+ # Match all alphanumeric + underscore with \w then cases for:
+ # '$', '<', '>', '{', '}', '[', ']', and '.'
+ tokens = re.findall(r'[\w\$\.<>\{\}\[\]]+', line)
+ return remove_qualifiers(tokens)
+
+
+def is_class_definition(line_tokens):
+ return line_tokens and line_tokens[0] == 'class'
+
+
+def is_end_of_class_definition(line_tokens):
+ return line_tokens and line_tokens[-1] == '{'
+
+
+def is_end_of_class(line_tokens):
+ return line_tokens and line_tokens[-1] == '}'
+
+
+def type_lookup(renamed_type, mapping_dict):
+ renamed_type_stripped = renamed_type.strip('[]')
+ postfix = renamed_type.replace(renamed_type_stripped, '')
agrieve 2016/06/13 16:57:01 postfix -> suffix
smaier 2016/06/13 18:57:16 Typically modifiers/operators after a token are re
+
+ if renamed_type_stripped in mapping_dict:
+ real_type = mapping_dict[renamed_type_stripped][0]
+ else:
+ real_type = renamed_type_stripped
+
+ return real_type + postfix
+
+
+def get_member_identifier(line_tokens, mapping_dict, renamed_class_name,
+ is_function):
+ assert len(line_tokens) > 1
+ assert renamed_class_name in mapping_dict
+ mapping_entry = mapping_dict[renamed_class_name][1]
+
+ renamed_type = line_tokens[0]
+ real_type = type_lookup(renamed_type, mapping_dict)
+
+ renamed_name_token = line_tokens[1]
+ renamed_name_token, _, _ = renamed_name_token.partition('=')
+
+ function_args = ''
+ if is_function:
+ function_args += '('
+ for token in line_tokens[2:]:
+ function_args += type_lookup(token, mapping_dict) + ','
+ # Remove trailing ','
+ if function_args.endswith(','):
agrieve 2016/06/13 16:57:01 rstrip()
smaier 2016/06/13 18:57:15 Done.
+ function_args = function_args[:-1]
+ function_args += ')'
+
+ renamed_member_identifier = (real_type + ' ' + renamed_name_token
+ + function_args)
+ if renamed_member_identifier not in mapping_entry:
+ print 'Proguarded class which caused the issue:', renamed_class_name
+ print 'Key supposed to be in this dict:', mapping_entry
+ print 'Definition line tokens:', line_tokens
+
+ # This will be the real type + real_identifier + any real function args (if
+ # applicable)
+ return mapping_entry[renamed_member_identifier]
+
+def get_class_names(line_tokens, mapping_dict):
+ assert len(line_tokens) > 1
+ assert line_tokens[1] in mapping_dict
+ return line_tokens[1], mapping_dict[line_tokens[1]][0]
+
+
+def is_line_function_definition(line):
+ line = strip_comments(line)
+ line = strip_quotes(line)
+ return line.find('(') > 0 and line.find(')') > 0
+
+# Expects data from dextra -j -m -f
+# Returns dictionary mapping class name to list of members
+def build_mapped_dex_dict(dextra_file, mapping_dict):
+ # Have to add 'bool' -> 'boolean' mapping in dictionary, since for some reason
+ # dextra shortens boolean to bool.
+ mapping_dict['bool'] = ['boolean', {}]
+ dex_dict = {}
+ current_entry = []
+ reading_class_header = True
+ unmatched_string = False
+
+ for line in dextra_file:
+ # Accounting for multi line strings
+ if line.count('"') % 2:
+ unmatched_string = not unmatched_string
+ continue
+ if unmatched_string:
+ continue
+
+ line_tokens = get_line_tokens(line)
+ if is_class_definition(line_tokens):
+ reading_class_header = True
+ renamed_class_name, real_class_name = get_class_names(line_tokens,
+ mapping_dict)
+ if is_end_of_class_definition(line_tokens):
+ reading_class_header = False
+ continue
+ if is_end_of_class(line_tokens):
+ dex_dict[real_class_name] = current_entry
+ current_entry = []
+ continue
+
+ if not reading_class_header and line_tokens:
+ is_function = is_line_function_definition(line)
+ member = get_member_identifier(line_tokens, mapping_dict,
+ renamed_class_name, is_function)
+ current_entry.append(member)
+
+ return dex_dict
+
+
+def diff_dex_dicts(dex_base, dex_new):
+ diffs = []
+ for key, base_class_members in dex_base.iteritems():
+ if key in dex_new:
+ # Class in both
+ base_class_members_set = set(base_class_members)
+ # Removing from dex_new to have just those which only appear in dex_new
+ # left over.
+ new_class_members_set = set(dex_new.pop(key))
+ if (base_class_members_set.issubset(new_class_members_set)
agrieve 2016/06/13 16:57:01 could you just say: base_class_members_set == new_
smaier 2016/06/13 18:57:15 Done.
+ and new_class_members_set.issubset(base_class_members_set)):
+ # They are equal
+ continue
+ else:
+ # They are not equal
+ diff_string = key
+ if base_class_members_set.difference(new_class_members_set):
agrieve 2016/06/13 16:57:01 nit: store in a variable.
smaier 2016/06/13 18:57:16 Done.
+ # Base has stuff the new one doesn't
+ for diff in base_class_members_set.difference(new_class_members_set):
+ diff_string += '\n' + '- ' + diff
+ if new_class_members_set.difference(base_class_members_set):
+ # New has stuff the base one doesn't
+ for diff in new_class_members_set.difference(base_class_members_set):
+ diff_string += '\n' + '+ ' + diff
+ diffs.append(diff_string)
+ else:
+ # Class not found in new
+ diff_string = '-class ' + key
+ diffs.append(diff_string)
+ if dex_new:
+ # Classes in new that have yet to be hit by base
+ for key in dex_new:
+ diff_string = '+class ' + key
+ diffs.append(diff_string)
+
+ return diffs
+
+
+def main(unused_argv):
+ assert(len(unused_argv) == 4)
agrieve 2016/06/13 16:57:00 this doesn't seem unused :P
smaier 2016/06/13 18:57:16 Done.
+
+ with open(unused_argv[0], "r") as f:
agrieve 2016/06/13 16:57:00 You should at least have a usage text and support
smaier 2016/06/13 18:57:16 Done.
+ mapping_base = read_mapping_dict(f)
+ with open(unused_argv[1], "r") as f:
+ dex_base = build_mapped_dex_dict(f, mapping_base)
+ with open(unused_argv[2], "r") as f:
+ mapping_new = read_mapping_dict(f)
+ with open(unused_argv[3], "r") as f:
+ dex_new = build_mapped_dex_dict(f, mapping_new)
+
+ diffs = diff_dex_dicts(dex_base, dex_new)
+ if diffs:
+ for diff in diffs:
+ print diff
+
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
+
« no previous file with comments | « tools/android/dexdiffer/OWNERS ('k') | tools/android/dexdiffer/dexdiffer_unittest.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698