| Index: tools/gen-postmortem-metadata.py
|
| ===================================================================
|
| --- tools/gen-postmortem-metadata.py (revision 0)
|
| +++ tools/gen-postmortem-metadata.py (revision 0)
|
| @@ -0,0 +1,478 @@
|
| +#!/usr/bin/env python
|
| +
|
| +#
|
| +# Copyright 2012 the V8 project authors. All rights reserved.
|
| +# Redistribution and use in source and binary forms, with or without
|
| +# modification, are permitted provided that the following conditions are
|
| +# met:
|
| +#
|
| +# * Redistributions of source code must retain the above copyright
|
| +# notice, this list of conditions and the following disclaimer.
|
| +# * Redistributions in binary form must reproduce the above
|
| +# copyright notice, this list of conditions and the following
|
| +# disclaimer in the documentation and/or other materials provided
|
| +# with the distribution.
|
| +# * Neither the name of Google Inc. nor the names of its
|
| +# contributors may be used to endorse or promote products derived
|
| +# from this software without specific prior written permission.
|
| +#
|
| +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
| +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
| +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
| +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
| +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
| +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
| +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
| +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
| +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
| +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
| +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
| +#
|
| +
|
| +#
|
| +# Emits a C++ file to be compiled and linked into libv8 to support postmortem
|
| +# debugging tools. Most importantly, this tool emits constants describing V8
|
| +# internals:
|
| +#
|
| +# v8dbg_type_CLASS__TYPE = VALUE Describes class type values
|
| +# v8dbg_class_CLASS__FIELD__TYPE = OFFSET Describes class fields
|
| +# v8dbg_parent_CLASS__PARENT Describes class hierarchy
|
| +# v8dbg_frametype_NAME = VALUE Describes stack frame values
|
| +# v8dbg_off_fp_NAME = OFFSET Frame pointer offsets
|
| +# v8dbg_prop_NAME = OFFSET Object property offsets
|
| +# v8dbg_NAME = VALUE Miscellaneous values
|
| +#
|
| +# These constants are declared as global integers so that they'll be present in
|
| +# the generated libv8 binary.
|
| +#
|
| +
|
| +import re
|
| +import sys
|
| +
|
| +#
|
| +# Miscellaneous constants, tags, and masks used for object identification.
|
| +#
|
| +consts_misc = [
|
| + { 'name': 'FirstNonstringType', 'value': 'FIRST_NONSTRING_TYPE' },
|
| +
|
| + { 'name': 'IsNotStringMask', 'value': 'kIsNotStringMask' },
|
| + { 'name': 'StringTag', 'value': 'kStringTag' },
|
| + { 'name': 'NotStringTag', 'value': 'kNotStringTag' },
|
| +
|
| + { 'name': 'StringEncodingMask', 'value': 'kStringEncodingMask' },
|
| + { 'name': 'TwoByteStringTag', 'value': 'kTwoByteStringTag' },
|
| + { 'name': 'AsciiStringTag', 'value': 'kAsciiStringTag' },
|
| +
|
| + { 'name': 'StringRepresentationMask',
|
| + 'value': 'kStringRepresentationMask' },
|
| + { 'name': 'SeqStringTag', 'value': 'kSeqStringTag' },
|
| + { 'name': 'ConsStringTag', 'value': 'kConsStringTag' },
|
| + { 'name': 'ExternalStringTag', 'value': 'kExternalStringTag' },
|
| +
|
| + { 'name': 'FailureTag', 'value': 'kFailureTag' },
|
| + { 'name': 'FailureTagMask', 'value': 'kFailureTagMask' },
|
| + { 'name': 'HeapObjectTag', 'value': 'kHeapObjectTag' },
|
| + { 'name': 'HeapObjectTagMask', 'value': 'kHeapObjectTagMask' },
|
| + { 'name': 'SmiTag', 'value': 'kSmiTag' },
|
| + { 'name': 'SmiTagMask', 'value': 'kSmiTagMask' },
|
| + { 'name': 'SmiValueShift', 'value': 'kSmiTagSize' },
|
| + { 'name': 'PointerSizeLog2', 'value': 'kPointerSizeLog2' },
|
| +
|
| + { 'name': 'prop_idx_content',
|
| + 'value': 'DescriptorArray::kContentArrayIndex' },
|
| + { 'name': 'prop_idx_first',
|
| + 'value': 'DescriptorArray::kFirstIndex' },
|
| + { 'name': 'prop_type_field',
|
| + 'value': 'FIELD' },
|
| + { 'name': 'prop_type_first_phantom',
|
| + 'value': 'MAP_TRANSITION' },
|
| + { 'name': 'prop_type_mask',
|
| + 'value': 'PropertyDetails::TypeField::kMask' },
|
| +
|
| + { 'name': 'off_fp_context',
|
| + 'value': 'StandardFrameConstants::kContextOffset' },
|
| + { 'name': 'off_fp_marker',
|
| + 'value': 'StandardFrameConstants::kMarkerOffset' },
|
| + { 'name': 'off_fp_function',
|
| + 'value': 'JavaScriptFrameConstants::kFunctionOffset' },
|
| + { 'name': 'off_fp_args',
|
| + 'value': 'JavaScriptFrameConstants::kLastParameterOffset' },
|
| +];
|
| +
|
| +#
|
| +# The following useful fields are missing accessors, so we define fake ones.
|
| +#
|
| +extras_accessors = [
|
| + 'HeapObject, map, Map, kMapOffset',
|
| + 'JSObject, elements, Object, kElementsOffset',
|
| + 'FixedArray, data, uintptr_t, kHeaderSize',
|
| + 'Map, instance_attributes, int, kInstanceAttributesOffset',
|
| + 'Map, instance_descriptors, int, kInstanceDescriptorsOrBitField3Offset',
|
| + 'Map, inobject_properties, int, kInObjectPropertiesOffset',
|
| + 'Map, instance_size, int, kInstanceSizeOffset',
|
| + 'HeapNumber, value, double, kValueOffset',
|
| + 'ConsString, first, String, kFirstOffset',
|
| + 'ConsString, second, String, kSecondOffset',
|
| + 'ExternalString, resource, Object, kResourceOffset',
|
| + 'SeqAsciiString, chars, char, kHeaderSize',
|
| +];
|
| +
|
| +#
|
| +# The following is a whitelist of classes we expect to find when scanning the
|
| +# source code. This list is not exhaustive, but it's still useful to identify
|
| +# when this script gets out of sync with the source. See load_objects().
|
| +#
|
| +expected_classes = [
|
| + 'ConsString', 'FixedArray', 'HeapNumber', 'JSArray', 'JSFunction',
|
| + 'JSObject', 'JSRegExp', 'JSValue', 'Map', 'Oddball', 'Script',
|
| + 'SeqAsciiString', 'SharedFunctionInfo'
|
| +];
|
| +
|
| +
|
| +#
|
| +# The following structures store high-level representations of the structures
|
| +# for which we're going to emit descriptive constants.
|
| +#
|
| +types = {}; # set of all type names
|
| +typeclasses = {}; # maps type names to corresponding class names
|
| +klasses = {}; # known classes, including parents
|
| +fields = []; # field declarations
|
| +
|
| +header = '''
|
| +/*
|
| + * This file is generated by %s. Do not edit directly.
|
| + */
|
| +
|
| +#include "v8.h"
|
| +#include "frames.h"
|
| +#include "frames-inl.h" /* for architecture-specific frame constants */
|
| +
|
| +using namespace v8::internal;
|
| +
|
| +extern "C" {
|
| +
|
| +/* stack frame constants */
|
| +#define FRAME_CONST(value, klass) \
|
| + int v8dbg_frametype_##klass = StackFrame::value;
|
| +
|
| +STACK_FRAME_TYPE_LIST(FRAME_CONST)
|
| +
|
| +#undef FRAME_CONST
|
| +
|
| +''' % sys.argv[0];
|
| +
|
| +footer = '''
|
| +}
|
| +'''
|
| +
|
| +#
|
| +# Loads class hierarchy and type information from "objects.h".
|
| +#
|
| +def load_objects():
|
| + objfilename = sys.argv[2];
|
| + objfile = open(objfilename, 'r');
|
| + in_insttype = False;
|
| +
|
| + typestr = '';
|
| +
|
| + #
|
| + # Construct a dictionary for the classes we're sure should be present.
|
| + #
|
| + checktypes = {};
|
| + for klass in expected_classes:
|
| + checktypes[klass] = True;
|
| +
|
| + #
|
| + # Iterate objects.h line-by-line to collect type and class information.
|
| + # For types, we accumulate a string representing the entire InstanceType
|
| + # enum definition and parse it later because it's easier to do so
|
| + # without the embedded newlines.
|
| + #
|
| + for line in objfile:
|
| + if (line.startswith('enum InstanceType {')):
|
| + in_insttype = True;
|
| + continue;
|
| +
|
| + if (in_insttype and line.startswith('};')):
|
| + in_insttype = False;
|
| + continue;
|
| +
|
| + line = re.sub('//.*', '', line.rstrip().lstrip());
|
| +
|
| + if (in_insttype):
|
| + typestr += line;
|
| + continue;
|
| +
|
| + match = re.match('class (\w[^\s:]*)(: public (\w[^\s{]*))?\s*{',
|
| + line);
|
| +
|
| + if (match):
|
| + klass = match.group(1);
|
| + pklass = match.group(3);
|
| + klasses[klass] = { 'parent': pklass };
|
| +
|
| + #
|
| + # Process the instance type declaration.
|
| + #
|
| + entries = typestr.split(',');
|
| + for entry in entries:
|
| + types[re.sub('\s*=.*', '', entry).lstrip()] = True;
|
| +
|
| + #
|
| + # Infer class names for each type based on a systematic transformation.
|
| + # For example, "JS_FUNCTION_TYPE" becomes "JSFunction". We find the
|
| + # class for each type rather than the other way around because there are
|
| + # fewer cases where one type maps to more than one class than the other
|
| + # way around.
|
| + #
|
| + for type in types:
|
| + #
|
| + # Symbols and Strings are implemented using the same classes.
|
| + #
|
| + usetype = re.sub('SYMBOL_', 'STRING_', type);
|
| +
|
| + #
|
| + # REGEXP behaves like REG_EXP, as in JS_REGEXP_TYPE => JSRegExp.
|
| + #
|
| + usetype = re.sub('_REGEXP_', '_REG_EXP_', usetype);
|
| +
|
| + #
|
| + # Remove the "_TYPE" suffix and then convert to camel case,
|
| + # except that a "JS" prefix remains uppercase (as in
|
| + # "JS_FUNCTION_TYPE" => "JSFunction").
|
| + #
|
| + if (not usetype.endswith('_TYPE')):
|
| + continue;
|
| +
|
| + usetype = usetype[0:len(usetype) - len('_TYPE')];
|
| + parts = usetype.split('_');
|
| + cctype = '';
|
| +
|
| + if (parts[0] == 'JS'):
|
| + cctype = 'JS';
|
| + start = 1;
|
| + else:
|
| + cctype = '';
|
| + start = 0;
|
| +
|
| + for ii in range(start, len(parts)):
|
| + part = parts[ii];
|
| + cctype += part[0].upper() + part[1:].lower();
|
| +
|
| + #
|
| + # Mapping string types is more complicated. Both types and
|
| + # class names for Strings specify a representation (e.g., Seq,
|
| + # Cons, External, or Sliced) and an encoding (TwoByte or Ascii),
|
| + # In the simplest case, both of these are explicit in both
|
| + # names, as in:
|
| + #
|
| + # EXTERNAL_ASCII_STRING_TYPE => ExternalAsciiString
|
| + #
|
| + # However, either the representation or encoding can be omitted
|
| + # from the type name, in which case "Seq" and "TwoByte" are
|
| + # assumed, as in:
|
| + #
|
| + # STRING_TYPE => SeqTwoByteString
|
| + #
|
| + # Additionally, sometimes the type name has more information
|
| + # than the class, as in:
|
| + #
|
| + # CONS_ASCII_STRING_TYPE => ConsString
|
| + #
|
| + # To figure this out dynamically, we first check for a
|
| + # representation and encoding and add them if they're not
|
| + # present. If that doesn't yield a valid class name, then we
|
| + # strip out the representation.
|
| + #
|
| + if (cctype.endswith('String')):
|
| + if (cctype.find('Cons') == -1 and
|
| + cctype.find('External') == -1 and
|
| + cctype.find('Sliced') == -1):
|
| + if (cctype.find('Ascii') != -1):
|
| + cctype = re.sub('AsciiString$',
|
| + 'SeqAsciiString', cctype);
|
| + else:
|
| + cctype = re.sub('String$',
|
| + 'SeqString', cctype);
|
| +
|
| + if (cctype.find('Ascii') == -1):
|
| + cctype = re.sub('String$', 'TwoByteString',
|
| + cctype);
|
| +
|
| + if (not (cctype in klasses)):
|
| + cctype = re.sub('Ascii', '', cctype);
|
| + cctype = re.sub('TwoByte', '', cctype);
|
| +
|
| + #
|
| + # Despite all that, some types have no corresponding class.
|
| + #
|
| + if (cctype in klasses):
|
| + typeclasses[type] = cctype;
|
| + if (cctype in checktypes):
|
| + del checktypes[cctype];
|
| +
|
| + if (len(checktypes) > 0):
|
| + for klass in checktypes:
|
| + print('error: expected class \"%s\" not found' % klass);
|
| +
|
| + sys.exit(1);
|
| +
|
| +
|
| +#
|
| +# For a given macro call, pick apart the arguments and return an object
|
| +# describing the corresponding output constant. See load_fields().
|
| +#
|
| +def parse_field(call):
|
| + # Replace newlines with spaces.
|
| + for ii in range(0, len(call)):
|
| + if (call[ii] == '\n'):
|
| + call[ii] == ' ';
|
| +
|
| + idx = call.find('(');
|
| + kind = call[0:idx];
|
| + rest = call[idx + 1: len(call) - 1];
|
| + args = re.split('\s*,\s*', rest);
|
| +
|
| + consts = [];
|
| +
|
| + if (kind == 'ACCESSORS' or kind == 'ACCESSORS_GCSAFE'):
|
| + klass = args[0];
|
| + field = args[1];
|
| + dtype = args[2];
|
| + offset = args[3];
|
| +
|
| + return ({
|
| + 'name': 'class_%s__%s__%s' % (klass, field, dtype),
|
| + 'value': '%s::%s' % (klass, offset)
|
| + });
|
| +
|
| + assert(kind == 'SMI_ACCESSORS');
|
| + klass = args[0];
|
| + field = args[1];
|
| + offset = args[2];
|
| +
|
| + return ({
|
| + 'name': 'class_%s__%s__%s' % (klass, field, 'SMI'),
|
| + 'value': '%s::%s' % (klass, offset)
|
| + });
|
| +
|
| +#
|
| +# Load field offset information from objects-inl.h.
|
| +#
|
| +def load_fields():
|
| + inlfilename = sys.argv[3];
|
| + inlfile = open(inlfilename, 'r');
|
| +
|
| + #
|
| + # Each class's fields and the corresponding offsets are described in the
|
| + # source by calls to macros like "ACCESSORS" (and friends). All we do
|
| + # here is extract these macro invocations, taking into account that they
|
| + # may span multiple lines and may contain nested parentheses. We also
|
| + # call parse_field() to pick apart the invocation.
|
| + #
|
| + prefixes = [ 'ACCESSORS', 'ACCESSORS_GCSAFE', 'SMI_ACCESSORS' ];
|
| + current = '';
|
| + opens = 0;
|
| +
|
| + for line in inlfile:
|
| + if (opens > 0):
|
| + # Continuation line
|
| + for ii in range(0, len(line)):
|
| + if (line[ii] == '('):
|
| + opens += 1;
|
| + elif (line[ii] == ')'):
|
| + opens -= 1;
|
| +
|
| + if (opens == 0):
|
| + break;
|
| +
|
| + current += line[0:ii + 1];
|
| + continue;
|
| +
|
| + for prefix in prefixes:
|
| + if (not line.startswith(prefix + '(')):
|
| + continue;
|
| +
|
| + if (len(current) > 0):
|
| + fields.append(parse_field(current));
|
| + current = '';
|
| +
|
| + for ii in range(len(prefix), len(line)):
|
| + if (line[ii] == '('):
|
| + opens += 1;
|
| + elif (line[ii] == ')'):
|
| + opens -= 1;
|
| +
|
| + if (opens == 0):
|
| + break;
|
| +
|
| + current += line[0:ii + 1];
|
| +
|
| + if (len(current) > 0):
|
| + fields.append(parse_field(current));
|
| + current = '';
|
| +
|
| + for body in extras_accessors:
|
| + fields.append(parse_field('ACCESSORS(%s)' % body));
|
| +
|
| +#
|
| +# Emit a block of constants.
|
| +#
|
| +def emit_set(out, consts):
|
| + for ii in range(0, len(consts)):
|
| + out.write('int v8dbg_%s = %s;\n' %
|
| + (consts[ii]['name'], consts[ii]['value']));
|
| + out.write('\n');
|
| +
|
| +#
|
| +# Emit the whole output file.
|
| +#
|
| +def emit_config():
|
| + out = file(sys.argv[1], 'w');
|
| +
|
| + out.write(header);
|
| +
|
| + out.write('/* miscellaneous constants */\n');
|
| + emit_set(out, consts_misc);
|
| +
|
| + out.write('/* class type information */\n');
|
| + consts = [];
|
| + keys = typeclasses.keys();
|
| + keys.sort();
|
| + for typename in keys:
|
| + klass = typeclasses[typename];
|
| + consts.append({
|
| + 'name': 'type_%s__%s' % (klass, typename),
|
| + 'value': typename
|
| + });
|
| +
|
| + emit_set(out, consts);
|
| +
|
| + out.write('/* class hierarchy information */\n');
|
| + consts = [];
|
| + keys = klasses.keys();
|
| + keys.sort();
|
| + for klassname in keys:
|
| + pklass = klasses[klassname]['parent'];
|
| + if (pklass == None):
|
| + continue;
|
| +
|
| + consts.append({
|
| + 'name': 'parent_%s__%s' % (klassname, pklass),
|
| + 'value': 0
|
| + });
|
| +
|
| + emit_set(out, consts);
|
| +
|
| + out.write('/* field information */\n');
|
| + emit_set(out, fields);
|
| +
|
| + out.write(footer);
|
| +
|
| +if (len(sys.argv) < 4):
|
| + print('usage: %s output.cc objects.h objects-inl.h' % sys.argv[0]);
|
| + sys.exit(2);
|
| +
|
| +load_objects();
|
| +load_fields();
|
| +emit_config();
|
|
|