Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(12)

Unified Diff: tools/gen-postmortem-metadata.py

Issue 8803024: Optionally export metadata with libv8 to enable debuggers to inspect V8 state. (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: '' Created 8 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « build/common.gypi ('k') | tools/gyp/v8.gyp » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tools/gen-postmortem-metadata.py
===================================================================
--- tools/gen-postmortem-metadata.py (revision 0)
+++ tools/gen-postmortem-metadata.py (revision 0)
@@ -0,0 +1,478 @@
+#!/usr/bin/env python
+
+#
+# Copyright 2012 the V8 project authors. All rights reserved.
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+#
+# Emits a C++ file to be compiled and linked into libv8 to support postmortem
+# debugging tools. Most importantly, this tool emits constants describing V8
+# internals:
+#
+# v8dbg_type_CLASS__TYPE = VALUE Describes class type values
+# v8dbg_class_CLASS__FIELD__TYPE = OFFSET Describes class fields
+# v8dbg_parent_CLASS__PARENT Describes class hierarchy
+# v8dbg_frametype_NAME = VALUE Describes stack frame values
+# v8dbg_off_fp_NAME = OFFSET Frame pointer offsets
+# v8dbg_prop_NAME = OFFSET Object property offsets
+# v8dbg_NAME = VALUE Miscellaneous values
+#
+# These constants are declared as global integers so that they'll be present in
+# the generated libv8 binary.
+#
+
+import re
+import sys
+
+#
+# Miscellaneous constants, tags, and masks used for object identification.
+#
+consts_misc = [
+ { 'name': 'FirstNonstringType', 'value': 'FIRST_NONSTRING_TYPE' },
+
+ { 'name': 'IsNotStringMask', 'value': 'kIsNotStringMask' },
+ { 'name': 'StringTag', 'value': 'kStringTag' },
+ { 'name': 'NotStringTag', 'value': 'kNotStringTag' },
+
+ { 'name': 'StringEncodingMask', 'value': 'kStringEncodingMask' },
+ { 'name': 'TwoByteStringTag', 'value': 'kTwoByteStringTag' },
+ { 'name': 'AsciiStringTag', 'value': 'kAsciiStringTag' },
+
+ { 'name': 'StringRepresentationMask',
+ 'value': 'kStringRepresentationMask' },
+ { 'name': 'SeqStringTag', 'value': 'kSeqStringTag' },
+ { 'name': 'ConsStringTag', 'value': 'kConsStringTag' },
+ { 'name': 'ExternalStringTag', 'value': 'kExternalStringTag' },
+
+ { 'name': 'FailureTag', 'value': 'kFailureTag' },
+ { 'name': 'FailureTagMask', 'value': 'kFailureTagMask' },
+ { 'name': 'HeapObjectTag', 'value': 'kHeapObjectTag' },
+ { 'name': 'HeapObjectTagMask', 'value': 'kHeapObjectTagMask' },
+ { 'name': 'SmiTag', 'value': 'kSmiTag' },
+ { 'name': 'SmiTagMask', 'value': 'kSmiTagMask' },
+ { 'name': 'SmiValueShift', 'value': 'kSmiTagSize' },
+ { 'name': 'PointerSizeLog2', 'value': 'kPointerSizeLog2' },
+
+ { 'name': 'prop_idx_content',
+ 'value': 'DescriptorArray::kContentArrayIndex' },
+ { 'name': 'prop_idx_first',
+ 'value': 'DescriptorArray::kFirstIndex' },
+ { 'name': 'prop_type_field',
+ 'value': 'FIELD' },
+ { 'name': 'prop_type_first_phantom',
+ 'value': 'MAP_TRANSITION' },
+ { 'name': 'prop_type_mask',
+ 'value': 'PropertyDetails::TypeField::kMask' },
+
+ { 'name': 'off_fp_context',
+ 'value': 'StandardFrameConstants::kContextOffset' },
+ { 'name': 'off_fp_marker',
+ 'value': 'StandardFrameConstants::kMarkerOffset' },
+ { 'name': 'off_fp_function',
+ 'value': 'JavaScriptFrameConstants::kFunctionOffset' },
+ { 'name': 'off_fp_args',
+ 'value': 'JavaScriptFrameConstants::kLastParameterOffset' },
+];
+
+#
+# The following useful fields are missing accessors, so we define fake ones.
+#
+extras_accessors = [
+ 'HeapObject, map, Map, kMapOffset',
+ 'JSObject, elements, Object, kElementsOffset',
+ 'FixedArray, data, uintptr_t, kHeaderSize',
+ 'Map, instance_attributes, int, kInstanceAttributesOffset',
+ 'Map, instance_descriptors, int, kInstanceDescriptorsOrBitField3Offset',
+ 'Map, inobject_properties, int, kInObjectPropertiesOffset',
+ 'Map, instance_size, int, kInstanceSizeOffset',
+ 'HeapNumber, value, double, kValueOffset',
+ 'ConsString, first, String, kFirstOffset',
+ 'ConsString, second, String, kSecondOffset',
+ 'ExternalString, resource, Object, kResourceOffset',
+ 'SeqAsciiString, chars, char, kHeaderSize',
+];
+
+#
+# The following is a whitelist of classes we expect to find when scanning the
+# source code. This list is not exhaustive, but it's still useful to identify
+# when this script gets out of sync with the source. See load_objects().
+#
+expected_classes = [
+ 'ConsString', 'FixedArray', 'HeapNumber', 'JSArray', 'JSFunction',
+ 'JSObject', 'JSRegExp', 'JSValue', 'Map', 'Oddball', 'Script',
+ 'SeqAsciiString', 'SharedFunctionInfo'
+];
+
+
+#
+# The following structures store high-level representations of the structures
+# for which we're going to emit descriptive constants.
+#
+types = {}; # set of all type names
+typeclasses = {}; # maps type names to corresponding class names
+klasses = {}; # known classes, including parents
+fields = []; # field declarations
+
+header = '''
+/*
+ * This file is generated by %s. Do not edit directly.
+ */
+
+#include "v8.h"
+#include "frames.h"
+#include "frames-inl.h" /* for architecture-specific frame constants */
+
+using namespace v8::internal;
+
+extern "C" {
+
+/* stack frame constants */
+#define FRAME_CONST(value, klass) \
+ int v8dbg_frametype_##klass = StackFrame::value;
+
+STACK_FRAME_TYPE_LIST(FRAME_CONST)
+
+#undef FRAME_CONST
+
+''' % sys.argv[0];
+
+footer = '''
+}
+'''
+
+#
+# Loads class hierarchy and type information from "objects.h".
+#
+def load_objects():
+ objfilename = sys.argv[2];
+ objfile = open(objfilename, 'r');
+ in_insttype = False;
+
+ typestr = '';
+
+ #
+ # Construct a dictionary for the classes we're sure should be present.
+ #
+ checktypes = {};
+ for klass in expected_classes:
+ checktypes[klass] = True;
+
+ #
+ # Iterate objects.h line-by-line to collect type and class information.
+ # For types, we accumulate a string representing the entire InstanceType
+ # enum definition and parse it later because it's easier to do so
+ # without the embedded newlines.
+ #
+ for line in objfile:
+ if (line.startswith('enum InstanceType {')):
+ in_insttype = True;
+ continue;
+
+ if (in_insttype and line.startswith('};')):
+ in_insttype = False;
+ continue;
+
+ line = re.sub('//.*', '', line.rstrip().lstrip());
+
+ if (in_insttype):
+ typestr += line;
+ continue;
+
+ match = re.match('class (\w[^\s:]*)(: public (\w[^\s{]*))?\s*{',
+ line);
+
+ if (match):
+ klass = match.group(1);
+ pklass = match.group(3);
+ klasses[klass] = { 'parent': pklass };
+
+ #
+ # Process the instance type declaration.
+ #
+ entries = typestr.split(',');
+ for entry in entries:
+ types[re.sub('\s*=.*', '', entry).lstrip()] = True;
+
+ #
+ # Infer class names for each type based on a systematic transformation.
+ # For example, "JS_FUNCTION_TYPE" becomes "JSFunction". We find the
+ # class for each type rather than the other way around because there are
+ # fewer cases where one type maps to more than one class than the other
+ # way around.
+ #
+ for type in types:
+ #
+ # Symbols and Strings are implemented using the same classes.
+ #
+ usetype = re.sub('SYMBOL_', 'STRING_', type);
+
+ #
+ # REGEXP behaves like REG_EXP, as in JS_REGEXP_TYPE => JSRegExp.
+ #
+ usetype = re.sub('_REGEXP_', '_REG_EXP_', usetype);
+
+ #
+ # Remove the "_TYPE" suffix and then convert to camel case,
+ # except that a "JS" prefix remains uppercase (as in
+ # "JS_FUNCTION_TYPE" => "JSFunction").
+ #
+ if (not usetype.endswith('_TYPE')):
+ continue;
+
+ usetype = usetype[0:len(usetype) - len('_TYPE')];
+ parts = usetype.split('_');
+ cctype = '';
+
+ if (parts[0] == 'JS'):
+ cctype = 'JS';
+ start = 1;
+ else:
+ cctype = '';
+ start = 0;
+
+ for ii in range(start, len(parts)):
+ part = parts[ii];
+ cctype += part[0].upper() + part[1:].lower();
+
+ #
+ # Mapping string types is more complicated. Both types and
+ # class names for Strings specify a representation (e.g., Seq,
+ # Cons, External, or Sliced) and an encoding (TwoByte or Ascii),
+ # In the simplest case, both of these are explicit in both
+ # names, as in:
+ #
+ # EXTERNAL_ASCII_STRING_TYPE => ExternalAsciiString
+ #
+ # However, either the representation or encoding can be omitted
+ # from the type name, in which case "Seq" and "TwoByte" are
+ # assumed, as in:
+ #
+ # STRING_TYPE => SeqTwoByteString
+ #
+ # Additionally, sometimes the type name has more information
+ # than the class, as in:
+ #
+ # CONS_ASCII_STRING_TYPE => ConsString
+ #
+ # To figure this out dynamically, we first check for a
+ # representation and encoding and add them if they're not
+ # present. If that doesn't yield a valid class name, then we
+ # strip out the representation.
+ #
+ if (cctype.endswith('String')):
+ if (cctype.find('Cons') == -1 and
+ cctype.find('External') == -1 and
+ cctype.find('Sliced') == -1):
+ if (cctype.find('Ascii') != -1):
+ cctype = re.sub('AsciiString$',
+ 'SeqAsciiString', cctype);
+ else:
+ cctype = re.sub('String$',
+ 'SeqString', cctype);
+
+ if (cctype.find('Ascii') == -1):
+ cctype = re.sub('String$', 'TwoByteString',
+ cctype);
+
+ if (not (cctype in klasses)):
+ cctype = re.sub('Ascii', '', cctype);
+ cctype = re.sub('TwoByte', '', cctype);
+
+ #
+ # Despite all that, some types have no corresponding class.
+ #
+ if (cctype in klasses):
+ typeclasses[type] = cctype;
+ if (cctype in checktypes):
+ del checktypes[cctype];
+
+ if (len(checktypes) > 0):
+ for klass in checktypes:
+ print('error: expected class \"%s\" not found' % klass);
+
+ sys.exit(1);
+
+
+#
+# For a given macro call, pick apart the arguments and return an object
+# describing the corresponding output constant. See load_fields().
+#
+def parse_field(call):
+ # Replace newlines with spaces.
+ for ii in range(0, len(call)):
+ if (call[ii] == '\n'):
+ call[ii] == ' ';
+
+ idx = call.find('(');
+ kind = call[0:idx];
+ rest = call[idx + 1: len(call) - 1];
+ args = re.split('\s*,\s*', rest);
+
+ consts = [];
+
+ if (kind == 'ACCESSORS' or kind == 'ACCESSORS_GCSAFE'):
+ klass = args[0];
+ field = args[1];
+ dtype = args[2];
+ offset = args[3];
+
+ return ({
+ 'name': 'class_%s__%s__%s' % (klass, field, dtype),
+ 'value': '%s::%s' % (klass, offset)
+ });
+
+ assert(kind == 'SMI_ACCESSORS');
+ klass = args[0];
+ field = args[1];
+ offset = args[2];
+
+ return ({
+ 'name': 'class_%s__%s__%s' % (klass, field, 'SMI'),
+ 'value': '%s::%s' % (klass, offset)
+ });
+
+#
+# Load field offset information from objects-inl.h.
+#
+def load_fields():
+ inlfilename = sys.argv[3];
+ inlfile = open(inlfilename, 'r');
+
+ #
+ # Each class's fields and the corresponding offsets are described in the
+ # source by calls to macros like "ACCESSORS" (and friends). All we do
+ # here is extract these macro invocations, taking into account that they
+ # may span multiple lines and may contain nested parentheses. We also
+ # call parse_field() to pick apart the invocation.
+ #
+ prefixes = [ 'ACCESSORS', 'ACCESSORS_GCSAFE', 'SMI_ACCESSORS' ];
+ current = '';
+ opens = 0;
+
+ for line in inlfile:
+ if (opens > 0):
+ # Continuation line
+ for ii in range(0, len(line)):
+ if (line[ii] == '('):
+ opens += 1;
+ elif (line[ii] == ')'):
+ opens -= 1;
+
+ if (opens == 0):
+ break;
+
+ current += line[0:ii + 1];
+ continue;
+
+ for prefix in prefixes:
+ if (not line.startswith(prefix + '(')):
+ continue;
+
+ if (len(current) > 0):
+ fields.append(parse_field(current));
+ current = '';
+
+ for ii in range(len(prefix), len(line)):
+ if (line[ii] == '('):
+ opens += 1;
+ elif (line[ii] == ')'):
+ opens -= 1;
+
+ if (opens == 0):
+ break;
+
+ current += line[0:ii + 1];
+
+ if (len(current) > 0):
+ fields.append(parse_field(current));
+ current = '';
+
+ for body in extras_accessors:
+ fields.append(parse_field('ACCESSORS(%s)' % body));
+
+#
+# Emit a block of constants.
+#
+def emit_set(out, consts):
+ for ii in range(0, len(consts)):
+ out.write('int v8dbg_%s = %s;\n' %
+ (consts[ii]['name'], consts[ii]['value']));
+ out.write('\n');
+
+#
+# Emit the whole output file.
+#
+def emit_config():
+ out = file(sys.argv[1], 'w');
+
+ out.write(header);
+
+ out.write('/* miscellaneous constants */\n');
+ emit_set(out, consts_misc);
+
+ out.write('/* class type information */\n');
+ consts = [];
+ keys = typeclasses.keys();
+ keys.sort();
+ for typename in keys:
+ klass = typeclasses[typename];
+ consts.append({
+ 'name': 'type_%s__%s' % (klass, typename),
+ 'value': typename
+ });
+
+ emit_set(out, consts);
+
+ out.write('/* class hierarchy information */\n');
+ consts = [];
+ keys = klasses.keys();
+ keys.sort();
+ for klassname in keys:
+ pklass = klasses[klassname]['parent'];
+ if (pklass == None):
+ continue;
+
+ consts.append({
+ 'name': 'parent_%s__%s' % (klassname, pklass),
+ 'value': 0
+ });
+
+ emit_set(out, consts);
+
+ out.write('/* field information */\n');
+ emit_set(out, fields);
+
+ out.write(footer);
+
+if (len(sys.argv) < 4):
+ print('usage: %s output.cc objects.h objects-inl.h' % sys.argv[0]);
+ sys.exit(2);
+
+load_objects();
+load_fields();
+emit_config();
« no previous file with comments | « build/common.gypi ('k') | tools/gyp/v8.gyp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698