OLD | NEW |
---|---|
(Empty) | |
1 #!/usr/bin/env python | |
2 | |
3 # | |
4 # Copyright 2006-2008 the V8 project authors. All rights reserved. | |
Vyacheslav Egorov (Chromium)
2012/01/17 18:14:58
2012
| |
5 # Redistribution and use in source and binary forms, with or without | |
6 # modification, are permitted provided that the following conditions are | |
7 # met: | |
8 # | |
9 # * Redistributions of source code must retain the above copyright | |
10 # notice, this list of conditions and the following disclaimer. | |
11 # * Redistributions in binary form must reproduce the above | |
12 # copyright notice, this list of conditions and the following | |
13 # disclaimer in the documentation and/or other materials provided | |
14 # with the distribution. | |
15 # * Neither the name of Google Inc. nor the names of its | |
16 # contributors may be used to endorse or promote products derived | |
17 # from this software without specific prior written permission. | |
18 # | |
19 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
20 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
21 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
22 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
23 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
24 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
25 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
26 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
27 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
28 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
29 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
30 # | |
31 | |
32 # | |
33 # Emits a C++ file to be compiled and linked into libv8 to support postmortem | |
34 # debugging tools. Most importantly, this tool emits constants describing V8 | |
35 # internals: | |
36 # | |
37 # v8dbg_type_CLASS__TYPE = VALUE Describes class type values | |
38 # v8dbg_class_CLASS__FIELD__TYPE = OFFSET Describes class fields | |
39 # v8dbg_parent_CLASS__PARENT Describes class hierarchy | |
40 # v8dbg_frametype_NAME = VALUE Describes stack frame values | |
41 # v8dbg_off_fp_NAME = OFFSET Frame pointer offsets | |
42 # v8dbg_prop_NAME = OFFSET Object property offsets | |
43 # v8dbg_NAME = VALUE Miscellaneous values | |
44 # | |
45 # These constants are declared as global integers so that they'll be present in | |
46 # the generated libv8 binary. | |
47 # | |
48 | |
49 import re | |
50 import sys | |
51 | |
52 # | |
53 # Miscellaneous constants, tags, and masks used for object identification. | |
54 # | |
55 consts_misc = [ | |
56 { 'name': 'FirstNonstringType', 'value': 'FIRST_NONSTRING_TYPE' }, | |
Vyacheslav Egorov (Chromium)
2012/01/17 18:14:58
I seems you can just list what you want to export
| |
57 | |
58 { 'name': 'IsNotStringMask', 'value': 'kIsNotStringMask' }, | |
59 { 'name': 'StringTag', 'value': 'kStringTag' }, | |
60 { 'name': 'NotStringTag', 'value': 'kNotStringTag' }, | |
61 | |
62 { 'name': 'StringEncodingMask', 'value': 'kStringEncodingMask' }, | |
63 { 'name': 'TwoByteStringTag', 'value': 'kTwoByteStringTag' }, | |
64 { 'name': 'AsciiStringTag', 'value': 'kAsciiStringTag' }, | |
65 | |
66 { 'name': 'StringRepresentationMask', | |
67 'value': 'kStringRepresentationMask' }, | |
68 { 'name': 'SeqStringTag', 'value': 'kSeqStringTag' }, | |
69 { 'name': 'ConsStringTag', 'value': 'kConsStringTag' }, | |
70 { 'name': 'ExternalStringTag', 'value': 'kExternalStringTag' }, | |
71 | |
72 { 'name': 'FailureTag', 'value': 'kFailureTag' }, | |
73 { 'name': 'FailureTagMask', 'value': 'kFailureTagMask' }, | |
74 { 'name': 'HeapObjectTag', 'value': 'kHeapObjectTag' }, | |
75 { 'name': 'HeapObjectTagMask', 'value': 'kHeapObjectTagMask' }, | |
76 { 'name': 'SmiTag', 'value': 'kSmiTag' }, | |
77 { 'name': 'SmiTagMask', 'value': 'kSmiTagMask' }, | |
78 { 'name': 'SmiValueShift', 'value': 'kSmiTagSize' }, | |
79 { 'name': 'PointerSizeLog2', 'value': 'kPointerSizeLog2' }, | |
80 | |
81 { 'name': 'prop_idx_content', | |
82 'value': 'DescriptorArray::kContentArrayIndex' }, | |
83 { 'name': 'prop_idx_first', | |
84 'value': 'DescriptorArray::kFirstIndex' }, | |
85 { 'name': 'prop_type_field', | |
86 'value': 'FIELD' }, | |
87 { 'name': 'prop_type_first_phantom', | |
88 'value': 'MAP_TRANSITION' }, | |
89 { 'name': 'prop_type_mask', | |
90 'value': 'PropertyDetails::TypeField::kMask' }, | |
91 | |
92 { 'name': 'off_fp_context', | |
93 'value': 'StandardFrameConstants::kContextOffset' }, | |
94 { 'name': 'off_fp_marker', | |
95 'value': 'StandardFrameConstants::kMarkerOffset' }, | |
96 { 'name': 'off_fp_function', | |
97 'value': 'JavaScriptFrameConstants::kFunctionOffset' }, | |
98 { 'name': 'off_fp_args', | |
99 'value': 'JavaScriptFrameConstants::kLastParameterOffset' }, | |
100 ]; | |
101 | |
102 # | |
103 # The following useful fields are missing accessors, so we define fake ones. | |
104 # | |
105 extras_accessors = [ | |
106 'HeapObject, map, Map, kMapOffset', | |
107 'JSObject, elements, Object, kElementsOffset', | |
108 'FixedArray, data, uintptr_t, kHeaderSize', | |
109 'Map, instance_attributes, int, kInstanceAttributesOffset', | |
110 'Map, instance_descriptors, int, kInstanceDescriptorsOrBitField3Offset', | |
111 'Map, inobject_properties, int, kInObjectPropertiesOffset', | |
112 'Map, instance_size, int, kInstanceSizeOffset', | |
113 'HeapNumber, value, double, kValueOffset', | |
114 'ConsString, first, String, kFirstOffset', | |
115 'ConsString, second, String, kSecondOffset', | |
116 'ExternalString, resource, Object, kResourceOffset', | |
117 'SeqAsciiString, chars, char, kHeaderSize', | |
118 ]; | |
119 | |
120 # | |
121 # The following structures store high-level representations of the structures | |
122 # for which we're going to emit descriptive constants. | |
123 # | |
124 types = {}; # set of all type names | |
125 typeclasses = {}; # maps type names to corresponding class names | |
126 klasses = {}; # known classes, including parents | |
127 fields = []; # field declarations | |
128 | |
129 header = ''' | |
130 /* | |
131 * This file is generated by %s. Do not edit directly. | |
132 */ | |
133 | |
134 #include "v8.h" | |
135 #include "frames.h" | |
136 #include "frames-inl.h" /* for architecture-specific frame constants */ | |
137 | |
138 using namespace v8::internal; | |
139 | |
140 extern "C" { | |
141 | |
142 /* stack frame constants */ | |
143 #define FRAME_CONST(value, klass) \ | |
144 int v8dbg_frametype_##klass = StackFrame::value; | |
145 | |
146 STACK_FRAME_TYPE_LIST(FRAME_CONST) | |
147 | |
148 #undef FRAME_CONST | |
149 | |
150 ''' % sys.argv[0]; | |
151 | |
152 footer = ''' | |
153 } | |
154 ''' | |
155 | |
156 # | |
157 # Loads class hierarchy and type information from "objects.h". | |
158 # | |
159 def load_objects(): | |
160 objfilename = sys.argv[2]; | |
161 objfile = open(objfilename, 'r'); | |
162 in_insttype = False; | |
163 | |
164 typestr = ''; | |
165 | |
166 # | |
167 # Iterate objects.h line-by-line to collect type and class information. | |
168 # For types, we accumulate a string representing the entire InstanceType | |
169 # enum definition and parse it later because it's easier to do so | |
170 # without the embedded newlines. | |
171 # | |
172 for line in objfile: | |
173 if (line.startswith('enum InstanceType {')): | |
174 in_insttype = True; | |
175 continue; | |
176 | |
177 if (in_insttype and line.startswith('};')): | |
178 in_insttype = False; | |
179 continue; | |
180 | |
181 line = re.sub('//.*', '', line.rstrip().lstrip()); | |
182 | |
183 if (in_insttype): | |
184 typestr += line; | |
185 continue; | |
186 | |
187 match = re.match('class (\w[^\s:]*)(: public (\w[^\s{]*))?\s*{', | |
188 line); | |
189 | |
190 if (match): | |
191 klass = match.group(1); | |
192 pklass = match.group(3); | |
193 klasses[klass] = { 'parent': pklass }; | |
194 | |
195 # | |
196 # Process the instance type declaration. | |
197 # | |
198 entries = typestr.split(','); | |
199 for entry in entries: | |
200 types[re.sub('\s*=.*', '', entry).lstrip()] = True; | |
201 | |
202 # | |
203 # Infer class names for each type based on a systematic transformation. | |
204 # For example, "JS_FUNCTION_TYPE" becomes "JSFunction". We find the | |
205 # class for each type rather than the other way around because there are | |
206 # fewer cases where one type maps to more than one class than the other | |
207 # way around. | |
208 # | |
Vyacheslav Egorov (Chromium)
2012/01/17 18:14:58
Did you consider using TYPE_CHECKER macro as a fee
| |
209 for type in types: | |
210 # | |
211 # Symbols and Strings are implemented using the same classes. | |
212 # | |
213 usetype = re.sub('SYMBOL_', 'STRING_', type); | |
214 | |
215 # | |
216 # REGEXP behaves like REG_EXP, as in JS_REGEXP_TYPE => JSRegExp. | |
217 # | |
218 usetype = re.sub('_REGEXP_', '_REG_EXP_', usetype); | |
219 | |
220 # | |
221 # Remove the "_TYPE" suffix and then convert to camel case, | |
222 # except that a "JS" prefix remains uppercase (as in | |
223 # "JS_FUNCTION_TYPE" => "JSFunction"). | |
224 # | |
225 if (not usetype.endswith('_TYPE')): | |
226 continue; | |
227 | |
228 usetype = usetype[0:len(usetype) - len('_TYPE')]; | |
229 parts = usetype.split('_'); | |
230 cctype = ''; | |
231 | |
232 if (parts[0] == 'JS'): | |
233 cctype = 'JS'; | |
234 start = 1; | |
235 else: | |
236 cctype = ''; | |
237 start = 0; | |
238 | |
239 for ii in range(start, len(parts)): | |
240 part = parts[ii]; | |
241 cctype += part[0].upper() + part[1:].lower(); | |
242 | |
243 # | |
244 # Mapping string types is more complicated. Both types and | |
Vyacheslav Egorov (Chromium)
2012/01/17 18:14:58
I think it might be fine to hard wire some instanc
| |
245 # class names for Strings specify a representation (e.g., Seq, | |
246 # Cons, External, or Sliced) and an encoding (TwoByte or Ascii), | |
247 # In the simplest case, both of these are explicit in both names , | |
Vyacheslav Egorov (Chromium)
2012/01/17 18:14:58
long line.
| |
248 # as in: | |
249 # | |
250 # EXTERNAL_ASCII_STRING_TYPE => ExternalAsciiString | |
251 # | |
252 # However, either the representation or encoding can be omitted | |
253 # from the type name, in which case "Seq" and "TwoByte" are | |
254 # assumed, as in: | |
255 # | |
256 # STRING_TYPE => SeqTwoByteString | |
257 # | |
258 # Additionally, sometimes the type name has more information | |
259 # than the class, as in: | |
260 # | |
261 # CONS_ASCII_STRING_TYPE => ConsString | |
262 # | |
263 # To figure this out dynamically, we first check for a | |
264 # representation and encoding and add them if they're not | |
265 # present. If that doesn't yield a valid class name, then we | |
266 # strip out the representation. | |
267 # | |
268 if (cctype.endswith('String')): | |
269 if (cctype.find('Cons') == -1 and | |
270 cctype.find('External') == -1 and | |
271 cctype.find('Sliced') == -1): | |
272 if (cctype.find('Ascii') != -1): | |
273 cctype = re.sub('AsciiString$', | |
274 'SeqAsciiString', cctype); | |
275 else: | |
276 cctype = re.sub('String$', | |
277 'SeqString', cctype); | |
278 | |
279 if (cctype.find('Ascii') == -1): | |
280 cctype = re.sub('String$', 'TwoByteString', | |
281 cctype); | |
282 | |
283 if (not (cctype in klasses)): | |
284 cctype = re.sub('Ascii', '', cctype); | |
285 cctype = re.sub('TwoByte', '', cctype); | |
286 | |
287 # | |
288 # Despite all that, some types have no corresponding class. | |
289 # | |
290 if (cctype in klasses): | |
291 typeclasses[type] = cctype; | |
Vyacheslav Egorov (Chromium)
2012/01/17 18:14:58
Maybe tools should have a whitelist and complain t
| |
292 | |
293 | |
294 # | |
295 # For a given macro call, pick apart the arguments and return an object | |
296 # describing the corresponding output constant. See load_fields(). | |
297 # | |
298 def parse_field(call): | |
299 # Replace newlines with spaces. | |
300 for ii in range(0, len(call)): | |
301 if (call[ii] == '\n'): | |
302 call[ii] == ' '; | |
303 | |
304 idx = call.find('('); | |
305 kind = call[0:idx]; | |
306 rest = call[idx + 1: len(call) - 1]; | |
307 args = re.split('\s*,\s*', rest); | |
308 | |
309 consts = []; | |
310 | |
311 if (kind == 'ACCESSORS' or kind == 'ACCESSORS_GCSAFE'): | |
312 klass = args[0]; | |
313 field = args[1]; | |
314 dtype = args[2]; | |
315 offset = args[3]; | |
316 | |
317 return ({ | |
318 'name': 'class_%s__%s__%s' % (klass, field, dtype), | |
319 'value': '%s::%s' % (klass, offset) | |
320 }); | |
321 | |
322 assert(kind == 'SMI_ACCESSORS'); | |
323 klass = args[0]; | |
324 field = args[1]; | |
325 offset = args[2]; | |
326 | |
327 return ({ | |
328 'name': 'class_%s__%s__%s' % (klass, field, 'SMI'), | |
329 'value': '%s::%s' % (klass, offset) | |
330 }); | |
331 | |
332 # | |
333 # Load field offset information from objects-inl.h. | |
334 # | |
335 def load_fields(): | |
336 inlfilename = sys.argv[3]; | |
337 inlfile = open(inlfilename, 'r'); | |
338 | |
339 # | |
340 # Each class's fields and the corresponding offsets are described in the | |
341 # source by calls to macros like "ACCESSORS" (and friends). All we do | |
342 # here is extract these macro invocations, taking into account that they | |
343 # may span multiple lines and may contain nested parentheses. We also | |
344 # call parse_field() to pick apart the invocation. | |
345 # | |
346 prefixes = [ 'ACCESSORS', 'ACCESSORS_GCSAFE', 'SMI_ACCESSORS' ]; | |
347 current = ''; | |
348 opens = 0; | |
349 | |
350 for line in inlfile: | |
351 if (opens > 0): | |
352 # Continuation line | |
353 for ii in range(0, len(line)): | |
354 if (line[ii] == '('): | |
355 opens += 1; | |
356 elif (line[ii] == ')'): | |
357 opens -= 1; | |
358 | |
359 if (opens == 0): | |
360 break; | |
361 | |
362 current += line[0:ii + 1]; | |
363 continue; | |
364 | |
365 for prefix in prefixes: | |
366 if (not line.startswith(prefix + '(')): | |
367 continue; | |
368 | |
369 if (len(current) > 0): | |
370 fields.append(parse_field(current)); | |
371 current = ''; | |
372 | |
373 for ii in range(len(prefix), len(line)): | |
374 if (line[ii] == '('): | |
375 opens += 1; | |
376 elif (line[ii] == ')'): | |
377 opens -= 1; | |
378 | |
379 if (opens == 0): | |
380 break; | |
381 | |
382 current += line[0:ii + 1]; | |
383 | |
384 if (len(current) > 0): | |
385 fields.append(parse_field(current)); | |
386 current = ''; | |
387 | |
388 for body in extras_accessors: | |
389 fields.append(parse_field('ACCESSORS(%s)' % body)); | |
390 | |
391 # | |
392 # Emit a block of constants. | |
393 # | |
394 def emit_set(out, consts): | |
395 for ii in range(0, len(consts)): | |
396 out.write('int v8dbg_%s = %s;\n' % | |
397 (consts[ii]['name'], consts[ii]['value'])); | |
398 out.write('\n'); | |
399 | |
400 # | |
401 # Emit the whole output file. | |
402 # | |
403 def emit_config(): | |
404 out = file(sys.argv[1], 'w'); | |
405 | |
406 out.write(header); | |
407 | |
408 out.write('/* miscellaneous constants */\n'); | |
409 emit_set(out, consts_misc); | |
410 | |
411 out.write('/* class type information */\n'); | |
412 consts = []; | |
413 keys = typeclasses.keys(); | |
414 keys.sort(); | |
415 for typename in keys: | |
416 klass = typeclasses[typename]; | |
417 consts.append({ | |
418 'name': 'type_%s__%s' % (klass, typename), | |
419 'value': typename | |
420 }); | |
421 | |
422 emit_set(out, consts); | |
423 | |
424 out.write('/* class hierarchy information */\n'); | |
425 consts = []; | |
426 keys = klasses.keys(); | |
427 keys.sort(); | |
428 for klassname in keys: | |
429 pklass = klasses[klassname]['parent']; | |
430 if (pklass == None): | |
431 continue; | |
432 | |
433 consts.append({ | |
434 'name': 'parent_%s__%s' % (klassname, pklass), | |
435 'value': 0 | |
436 }); | |
437 | |
438 emit_set(out, consts); | |
439 | |
440 out.write('/* field information */\n'); | |
441 emit_set(out, fields); | |
442 | |
443 out.write(footer); | |
444 | |
445 if (len(sys.argv) < 4): | |
446 print('usage: %s output.cc objects.h objects-inl.h' % sys.argv[0]); | |
447 sys.exit(2); | |
448 | |
449 load_objects(); | |
450 load_fields(); | |
451 emit_config(); | |
OLD | NEW |