tools/binary_size/run_binary_size_analysis.py - Issue 302443006: binary_size: Avoid creating nodes with many thousand children.

Unified Diff: tools/binary_size/run_binary_size_analysis.py

Issue 302443006: binary_size: Avoid creating nodes with many thousand children. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Split (no path) Rebased to newer master Created 6 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: tools/binary_size/run_binary_size_analysis.py

diff --git a/tools/binary_size/run_binary_size_analysis.py b/tools/binary_size/run_binary_size_analysis.py

index f4dad625868b64b765115205ac8c36a1543fa69a..f06884f8dafe1f7ba54c8586d18882c0e0ec5227 100755

--- a/tools/binary_size/run_binary_size_analysis.py

+++ b/tools/binary_size/run_binary_size_analysis.py

@@ -39,6 +39,25 @@ sys.path.append(elf_symbolizer_path)

import symbols.elf_symbolizer as elf_symbolizer # pylint: disable=F0401

+# Node dictionary keys. These are output in json read by the webapp so

+# keep them short to save file size.

+# Note: If these change, the webapp must also change.

+NODE_TYPE_KEY = 'k'

+NODE_NAME_KEY = 'n'

+NODE_CHILDREN_KEY = 'children'

+NODE_SYMBOL_TYPE_KEY = 't'

+NODE_SYMBOL_SIZE_KEY = 'value'

+NODE_MAX_DEPTH_KEY = 'maxDepth'

+NODE_LAST_PATH_ELEMENT_KEY = 'lastPathElement'

+# The display name of the bucket where we put symbols without path.

+NAME_NO_PATH_BUCKET = '(No Path)'

+# Try to keep data buckets smaller than this to avoid killing the

+# graphing lib.

+BIG_BUCKET_LIMIT = 3000

# TODO(andrewhayden): Only used for legacy reports. Delete.

def FormatBytes(byte_count):

"""Pretty-print a number of bytes."""

@@ -63,41 +82,101 @@ def SymbolTypeToHuman(symbol_type):

def _MkChild(node, name):

- child = node['children'].get(name)

+ child = node[NODE_CHILDREN_KEY].get(name)

if child is None:

- child = {'n': name, 'children': {}}

- node['children'][name] = child

+ child = {NODE_NAME_KEY: name,

+ NODE_CHILDREN_KEY: {}}

+ node[NODE_CHILDREN_KEY][name] = child

return child

+def SplitNoPathBucket(node):

+ """NAME_NO_PATH_BUCKET can be too large for the graphing lib to

+ handle. Split it into sub-buckets in that case."""

+ root_children = node[NODE_CHILDREN_KEY]

+ if NAME_NO_PATH_BUCKET in root_children:

+ no_path_bucket = root_children[NAME_NO_PATH_BUCKET]

+ old_children = no_path_bucket[NODE_CHILDREN_KEY]

+ count = 0

+ for symbol_type, symbol_bucket in old_children.iteritems():

+ count += len(symbol_bucket[NODE_CHILDREN_KEY])

+ if count > BIG_BUCKET_LIMIT:

+ new_children = {}

+ no_path_bucket[NODE_CHILDREN_KEY] = new_children

+ current_bucket = None

+ index = 0

+ for symbol_type, symbol_bucket in old_children.iteritems():

+ for symbol_name, value in symbol_bucket[NODE_CHILDREN_KEY].iteritems():

+ if index % BIG_BUCKET_LIMIT == 0:

+ group_no = (index / BIG_BUCKET_LIMIT) + 1

+ current_bucket = _MkChild(no_path_bucket,

+ '%s subgroup %d' % (NAME_NO_PATH_BUCKET,

+ group_no))

+ assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'p'

+ node[NODE_TYPE_KEY] = 'p' # p for path

+ index += 1

+ symbol_size = value[NODE_SYMBOL_SIZE_KEY]

+ AddSymbolIntoFileNode(current_bucket, symbol_type,

+ symbol_name, symbol_size)

def MakeChildrenDictsIntoLists(node):

largest_list_len = 0

- if 'children' in node:

- largest_list_len = len(node['children'])

+ if NODE_CHILDREN_KEY in node:

+ largest_list_len = len(node[NODE_CHILDREN_KEY])

child_list = []

- for child in node['children'].itervalues():

+ for child in node[NODE_CHILDREN_KEY].itervalues():

child_largest_list_len = MakeChildrenDictsIntoLists(child)

if child_largest_list_len > largest_list_len:

largest_list_len = child_largest_list_len

child_list.append(child)

- node['children'] = child_list

+ node[NODE_CHILDREN_KEY] = child_list

return largest_list_len

+def AddSymbolIntoFileNode(node, symbol_type, symbol_name, symbol_size):

+ """Puts symbol into the file path node |node|.

+ Returns the number of added levels in tree. I.e. returns 2."""

+ # 'node' is the file node and first step is to find its symbol-type bucket.

+ node[NODE_LAST_PATH_ELEMENT_KEY] = True

+ node = _MkChild(node, symbol_type)

+ assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'b'

+ node[NODE_SYMBOL_TYPE_KEY] = symbol_type

+ node[NODE_TYPE_KEY] = 'b' # b for bucket

+ # 'node' is now the symbol-type bucket. Make the child entry.

+ node = _MkChild(node, symbol_name)

+ if NODE_CHILDREN_KEY in node:

+ if node[NODE_CHILDREN_KEY]:

+ logging.warning('A container node used as symbol for %s.' % symbol_name)

+ # This is going to be used as a leaf so no use for child list.

+ del node[NODE_CHILDREN_KEY]

+ node[NODE_SYMBOL_SIZE_KEY] = symbol_size

+ node[NODE_SYMBOL_TYPE_KEY] = symbol_type

+ node[NODE_TYPE_KEY] = 's' # s for symbol

+ return 2 # Depth of the added subtree.

def MakeCompactTree(symbols):

- result = {'n': '/', 'children': {}, 'k': 'p', 'maxDepth': 0}

+ result = {NODE_NAME_KEY: '/',

+ NODE_CHILDREN_KEY: {},

+ NODE_TYPE_KEY: 'p',

+ NODE_MAX_DEPTH_KEY: 0}

seen_symbol_with_path = False

for symbol_name, symbol_type, symbol_size, file_path in symbols:

if 'vtable for ' in symbol_name:

- symbol_type = '@' # hack to categorize these separately

+ symbol_type = '@' # hack to categorize these separately

# Take path like '/foo/bar/baz', convert to ['foo', 'bar', 'baz']

if file_path:

file_path = os.path.normpath(file_path)

seen_symbol_with_path = True

else:

- file_path = '(No Path)'

+ file_path = NAME_NO_PATH_BUCKET

if file_path.startswith('/'):

file_path = file_path[1:]

@@ -112,36 +191,22 @@ def MakeCompactTree(symbols):

continue

depth += 1

node = _MkChild(node, path_part)

- assert not 'k' in node or node['k'] == 'p'

- node['k'] = 'p' # p for path

- # 'node' is now the file node. Find the symbol-type bucket.

- node['lastPathElement'] = True

- node = _MkChild(node, symbol_type)

- assert not 'k' in node or node['k'] == 'b'

- node['t'] = symbol_type

- node['k'] = 'b' # b for bucket

- depth += 1

- # 'node' is now the symbol-type bucket. Make the child entry.

- node = _MkChild(node, symbol_name)

- if 'children' in node:

- if node['children']:

- logging.warning('A container node used as symbol for %s.' % symbol_name)

- # This is going to be used as a leaf so no use for child list.

- del node['children']

- node['value'] = symbol_size

- node['t'] = symbol_type

- node['k'] = 's' # s for symbol

- depth += 1

- result['maxDepth'] = max(result['maxDepth'], depth)

+ assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'p'

+ node[NODE_TYPE_KEY] = 'p' # p for path

+ depth += AddSymbolIntoFileNode(node, symbol_type, symbol_name, symbol_size)

+ result[NODE_MAX_DEPTH_KEY] = max(result[NODE_MAX_DEPTH_KEY], depth)

if not seen_symbol_with_path:

logging.warning('Symbols lack paths. Data will not be structured.')

+ # The (no path) bucket can be extremely large if we failed to get

+ # path information. Split it into subgroups if needed.

+ SplitNoPathBucket(result)

largest_list_len = MakeChildrenDictsIntoLists(result)

- if largest_list_len > 1000:

+ if largest_list_len > BIG_BUCKET_LIMIT:

logging.warning('There are sections with %d nodes. '

'Results might be unusable.' % largest_list_len)

return result

« no previous file with comments | « no previous file | no next file » | no next file with comments »