OLD | NEW |
| (Empty) |
1 #!/usr/bin/env python | |
2 # | |
3 # Copyright 2007 Neal Norwitz | |
4 # Portions Copyright 2007 Google Inc. | |
5 # | |
6 # Licensed under the Apache License, Version 2.0 (the "License"); | |
7 # you may not use this file except in compliance with the License. | |
8 # You may obtain a copy of the License at | |
9 # | |
10 # http://www.apache.org/licenses/LICENSE-2.0 | |
11 # | |
12 # Unless required by applicable law or agreed to in writing, software | |
13 # distributed under the License is distributed on an "AS IS" BASIS, | |
14 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
15 # See the License for the specific language governing permissions and | |
16 # limitations under the License. | |
17 | |
18 """Generate an Abstract Syntax Tree (AST) for C++.""" | |
19 | |
20 __author__ = 'nnorwitz@google.com (Neal Norwitz)' | |
21 | |
22 | |
23 # TODO: | |
24 # * Tokens should never be exported, need to convert to Nodes | |
25 # (return types, parameters, etc.) | |
26 # * Handle static class data for templatized classes | |
27 # * Handle casts (both C++ and C-style) | |
28 # * Handle conditions and loops (if/else, switch, for, while/do) | |
29 # | |
30 # TODO much, much later: | |
31 # * Handle #define | |
32 # * exceptions | |
33 | |
34 | |
35 try: | |
36 # Python 3.x | |
37 import builtins | |
38 except ImportError: | |
39 # Python 2.x | |
40 import __builtin__ as builtins | |
41 | |
42 import sys | |
43 import traceback | |
44 | |
45 from cpp import keywords | |
46 from cpp import tokenize | |
47 from cpp import utils | |
48 | |
49 | |
50 if not hasattr(builtins, 'reversed'): | |
51 # Support Python 2.3 and earlier. | |
52 def reversed(seq): | |
53 for i in range(len(seq)-1, -1, -1): | |
54 yield seq[i] | |
55 | |
56 if not hasattr(builtins, 'next'): | |
57 # Support Python 2.5 and earlier. | |
58 def next(obj): | |
59 return obj.next() | |
60 | |
61 | |
62 VISIBILITY_PUBLIC, VISIBILITY_PROTECTED, VISIBILITY_PRIVATE = range(3) | |
63 | |
64 FUNCTION_NONE = 0x00 | |
65 FUNCTION_CONST = 0x01 | |
66 FUNCTION_VIRTUAL = 0x02 | |
67 FUNCTION_PURE_VIRTUAL = 0x04 | |
68 FUNCTION_CTOR = 0x08 | |
69 FUNCTION_DTOR = 0x10 | |
70 FUNCTION_ATTRIBUTE = 0x20 | |
71 FUNCTION_UNKNOWN_ANNOTATION = 0x40 | |
72 FUNCTION_THROW = 0x80 | |
73 | |
74 """ | |
75 These are currently unused. Should really handle these properly at some point. | |
76 | |
77 TYPE_MODIFIER_INLINE = 0x010000 | |
78 TYPE_MODIFIER_EXTERN = 0x020000 | |
79 TYPE_MODIFIER_STATIC = 0x040000 | |
80 TYPE_MODIFIER_CONST = 0x080000 | |
81 TYPE_MODIFIER_REGISTER = 0x100000 | |
82 TYPE_MODIFIER_VOLATILE = 0x200000 | |
83 TYPE_MODIFIER_MUTABLE = 0x400000 | |
84 | |
85 TYPE_MODIFIER_MAP = { | |
86 'inline': TYPE_MODIFIER_INLINE, | |
87 'extern': TYPE_MODIFIER_EXTERN, | |
88 'static': TYPE_MODIFIER_STATIC, | |
89 'const': TYPE_MODIFIER_CONST, | |
90 'register': TYPE_MODIFIER_REGISTER, | |
91 'volatile': TYPE_MODIFIER_VOLATILE, | |
92 'mutable': TYPE_MODIFIER_MUTABLE, | |
93 } | |
94 """ | |
95 | |
96 _INTERNAL_TOKEN = 'internal' | |
97 _NAMESPACE_POP = 'ns-pop' | |
98 | |
99 | |
100 # TODO(nnorwitz): use this as a singleton for templated_types, etc | |
101 # where we don't want to create a new empty dict each time. It is also const. | |
102 class _NullDict(object): | |
103 __contains__ = lambda self: False | |
104 keys = values = items = iterkeys = itervalues = iteritems = lambda self: () | |
105 | |
106 | |
107 # TODO(nnorwitz): move AST nodes into a separate module. | |
108 class Node(object): | |
109 """Base AST node.""" | |
110 | |
111 def __init__(self, start, end): | |
112 self.start = start | |
113 self.end = end | |
114 | |
115 def IsDeclaration(self): | |
116 """Returns bool if this node is a declaration.""" | |
117 return False | |
118 | |
119 def IsDefinition(self): | |
120 """Returns bool if this node is a definition.""" | |
121 return False | |
122 | |
123 def IsExportable(self): | |
124 """Returns bool if this node exportable from a header file.""" | |
125 return False | |
126 | |
127 def Requires(self, node): | |
128 """Does this AST node require the definition of the node passed in?""" | |
129 return False | |
130 | |
131 def XXX__str__(self): | |
132 return self._StringHelper(self.__class__.__name__, '') | |
133 | |
134 def _StringHelper(self, name, suffix): | |
135 if not utils.DEBUG: | |
136 return '%s(%s)' % (name, suffix) | |
137 return '%s(%d, %d, %s)' % (name, self.start, self.end, suffix) | |
138 | |
139 def __repr__(self): | |
140 return str(self) | |
141 | |
142 | |
143 class Define(Node): | |
144 def __init__(self, start, end, name, definition): | |
145 Node.__init__(self, start, end) | |
146 self.name = name | |
147 self.definition = definition | |
148 | |
149 def __str__(self): | |
150 value = '%s %s' % (self.name, self.definition) | |
151 return self._StringHelper(self.__class__.__name__, value) | |
152 | |
153 | |
154 class Include(Node): | |
155 def __init__(self, start, end, filename, system): | |
156 Node.__init__(self, start, end) | |
157 self.filename = filename | |
158 self.system = system | |
159 | |
160 def __str__(self): | |
161 fmt = '"%s"' | |
162 if self.system: | |
163 fmt = '<%s>' | |
164 return self._StringHelper(self.__class__.__name__, fmt % self.filename) | |
165 | |
166 | |
167 class Goto(Node): | |
168 def __init__(self, start, end, label): | |
169 Node.__init__(self, start, end) | |
170 self.label = label | |
171 | |
172 def __str__(self): | |
173 return self._StringHelper(self.__class__.__name__, str(self.label)) | |
174 | |
175 | |
176 class Expr(Node): | |
177 def __init__(self, start, end, expr): | |
178 Node.__init__(self, start, end) | |
179 self.expr = expr | |
180 | |
181 def Requires(self, node): | |
182 # TODO(nnorwitz): impl. | |
183 return False | |
184 | |
185 def __str__(self): | |
186 return self._StringHelper(self.__class__.__name__, str(self.expr)) | |
187 | |
188 | |
189 class Return(Expr): | |
190 pass | |
191 | |
192 | |
193 class Delete(Expr): | |
194 pass | |
195 | |
196 | |
197 class Friend(Expr): | |
198 def __init__(self, start, end, expr, namespace): | |
199 Expr.__init__(self, start, end, expr) | |
200 self.namespace = namespace[:] | |
201 | |
202 | |
203 class Using(Node): | |
204 def __init__(self, start, end, names): | |
205 Node.__init__(self, start, end) | |
206 self.names = names | |
207 | |
208 def __str__(self): | |
209 return self._StringHelper(self.__class__.__name__, str(self.names)) | |
210 | |
211 | |
212 class Parameter(Node): | |
213 def __init__(self, start, end, name, parameter_type, default): | |
214 Node.__init__(self, start, end) | |
215 self.name = name | |
216 self.type = parameter_type | |
217 self.default = default | |
218 | |
219 def Requires(self, node): | |
220 # TODO(nnorwitz): handle namespaces, etc. | |
221 return self.type.name == node.name | |
222 | |
223 def __str__(self): | |
224 name = str(self.type) | |
225 suffix = '%s %s' % (name, self.name) | |
226 if self.default: | |
227 suffix += ' = ' + ''.join([d.name for d in self.default]) | |
228 return self._StringHelper(self.__class__.__name__, suffix) | |
229 | |
230 | |
231 class _GenericDeclaration(Node): | |
232 def __init__(self, start, end, name, namespace): | |
233 Node.__init__(self, start, end) | |
234 self.name = name | |
235 self.namespace = namespace[:] | |
236 | |
237 def FullName(self): | |
238 prefix = '' | |
239 if self.namespace and self.namespace[-1]: | |
240 prefix = '::'.join(self.namespace) + '::' | |
241 return prefix + self.name | |
242 | |
243 def _TypeStringHelper(self, suffix): | |
244 if self.namespace: | |
245 names = [n or '<anonymous>' for n in self.namespace] | |
246 suffix += ' in ' + '::'.join(names) | |
247 return self._StringHelper(self.__class__.__name__, suffix) | |
248 | |
249 | |
250 # TODO(nnorwitz): merge with Parameter in some way? | |
251 class VariableDeclaration(_GenericDeclaration): | |
252 def __init__(self, start, end, name, var_type, initial_value, namespace): | |
253 _GenericDeclaration.__init__(self, start, end, name, namespace) | |
254 self.type = var_type | |
255 self.initial_value = initial_value | |
256 | |
257 def Requires(self, node): | |
258 # TODO(nnorwitz): handle namespaces, etc. | |
259 return self.type.name == node.name | |
260 | |
261 def ToString(self): | |
262 """Return a string that tries to reconstitute the variable decl.""" | |
263 suffix = '%s %s' % (self.type, self.name) | |
264 if self.initial_value: | |
265 suffix += ' = ' + self.initial_value | |
266 return suffix | |
267 | |
268 def __str__(self): | |
269 return self._StringHelper(self.__class__.__name__, self.ToString()) | |
270 | |
271 | |
272 class Typedef(_GenericDeclaration): | |
273 def __init__(self, start, end, name, alias, namespace): | |
274 _GenericDeclaration.__init__(self, start, end, name, namespace) | |
275 self.alias = alias | |
276 | |
277 def IsDefinition(self): | |
278 return True | |
279 | |
280 def IsExportable(self): | |
281 return True | |
282 | |
283 def Requires(self, node): | |
284 # TODO(nnorwitz): handle namespaces, etc. | |
285 name = node.name | |
286 for token in self.alias: | |
287 if token is not None and name == token.name: | |
288 return True | |
289 return False | |
290 | |
291 def __str__(self): | |
292 suffix = '%s, %s' % (self.name, self.alias) | |
293 return self._TypeStringHelper(suffix) | |
294 | |
295 | |
296 class _NestedType(_GenericDeclaration): | |
297 def __init__(self, start, end, name, fields, namespace): | |
298 _GenericDeclaration.__init__(self, start, end, name, namespace) | |
299 self.fields = fields | |
300 | |
301 def IsDefinition(self): | |
302 return True | |
303 | |
304 def IsExportable(self): | |
305 return True | |
306 | |
307 def __str__(self): | |
308 suffix = '%s, {%s}' % (self.name, self.fields) | |
309 return self._TypeStringHelper(suffix) | |
310 | |
311 | |
312 class Union(_NestedType): | |
313 pass | |
314 | |
315 | |
316 class Enum(_NestedType): | |
317 pass | |
318 | |
319 | |
320 class Class(_GenericDeclaration): | |
321 def __init__(self, start, end, name, bases, templated_types, body, namespace
): | |
322 _GenericDeclaration.__init__(self, start, end, name, namespace) | |
323 self.bases = bases | |
324 self.body = body | |
325 self.templated_types = templated_types | |
326 | |
327 def IsDeclaration(self): | |
328 return self.bases is None and self.body is None | |
329 | |
330 def IsDefinition(self): | |
331 return not self.IsDeclaration() | |
332 | |
333 def IsExportable(self): | |
334 return not self.IsDeclaration() | |
335 | |
336 def Requires(self, node): | |
337 # TODO(nnorwitz): handle namespaces, etc. | |
338 if self.bases: | |
339 for token_list in self.bases: | |
340 # TODO(nnorwitz): bases are tokens, do name comparision. | |
341 for token in token_list: | |
342 if token.name == node.name: | |
343 return True | |
344 # TODO(nnorwitz): search in body too. | |
345 return False | |
346 | |
347 def __str__(self): | |
348 name = self.name | |
349 if self.templated_types: | |
350 name += '<%s>' % self.templated_types | |
351 suffix = '%s, %s, %s' % (name, self.bases, self.body) | |
352 return self._TypeStringHelper(suffix) | |
353 | |
354 | |
355 class Struct(Class): | |
356 pass | |
357 | |
358 | |
359 class Function(_GenericDeclaration): | |
360 def __init__(self, start, end, name, return_type, parameters, | |
361 modifiers, templated_types, body, namespace): | |
362 _GenericDeclaration.__init__(self, start, end, name, namespace) | |
363 converter = TypeConverter(namespace) | |
364 self.return_type = converter.CreateReturnType(return_type) | |
365 self.parameters = converter.ToParameters(parameters) | |
366 self.modifiers = modifiers | |
367 self.body = body | |
368 self.templated_types = templated_types | |
369 | |
370 def IsDeclaration(self): | |
371 return self.body is None | |
372 | |
373 def IsDefinition(self): | |
374 return self.body is not None | |
375 | |
376 def IsExportable(self): | |
377 if self.return_type and 'static' in self.return_type.modifiers: | |
378 return False | |
379 return None not in self.namespace | |
380 | |
381 def Requires(self, node): | |
382 if self.parameters: | |
383 # TODO(nnorwitz): parameters are tokens, do name comparision. | |
384 for p in self.parameters: | |
385 if p.name == node.name: | |
386 return True | |
387 # TODO(nnorwitz): search in body too. | |
388 return False | |
389 | |
390 def __str__(self): | |
391 # TODO(nnorwitz): add templated_types. | |
392 suffix = ('%s %s(%s), 0x%02x, %s' % | |
393 (self.return_type, self.name, self.parameters, | |
394 self.modifiers, self.body)) | |
395 return self._TypeStringHelper(suffix) | |
396 | |
397 | |
398 class Method(Function): | |
399 def __init__(self, start, end, name, in_class, return_type, parameters, | |
400 modifiers, templated_types, body, namespace): | |
401 Function.__init__(self, start, end, name, return_type, parameters, | |
402 modifiers, templated_types, body, namespace) | |
403 # TODO(nnorwitz): in_class could also be a namespace which can | |
404 # mess up finding functions properly. | |
405 self.in_class = in_class | |
406 | |
407 | |
408 class Type(_GenericDeclaration): | |
409 """Type used for any variable (eg class, primitive, struct, etc).""" | |
410 | |
411 def __init__(self, start, end, name, templated_types, modifiers, | |
412 reference, pointer, array): | |
413 """ | |
414 Args: | |
415 name: str name of main type | |
416 templated_types: [Class (Type?)] template type info between <> | |
417 modifiers: [str] type modifiers (keywords) eg, const, mutable, etc. | |
418 reference, pointer, array: bools | |
419 """ | |
420 _GenericDeclaration.__init__(self, start, end, name, []) | |
421 self.templated_types = templated_types | |
422 if not name and modifiers: | |
423 self.name = modifiers.pop() | |
424 self.modifiers = modifiers | |
425 self.reference = reference | |
426 self.pointer = pointer | |
427 self.array = array | |
428 | |
429 def __str__(self): | |
430 prefix = '' | |
431 if self.modifiers: | |
432 prefix = ' '.join(self.modifiers) + ' ' | |
433 name = str(self.name) | |
434 if self.templated_types: | |
435 name += '<%s>' % self.templated_types | |
436 suffix = prefix + name | |
437 if self.reference: | |
438 suffix += '&' | |
439 if self.pointer: | |
440 suffix += '*' | |
441 if self.array: | |
442 suffix += '[]' | |
443 return self._TypeStringHelper(suffix) | |
444 | |
445 # By definition, Is* are always False. A Type can only exist in | |
446 # some sort of variable declaration, parameter, or return value. | |
447 def IsDeclaration(self): | |
448 return False | |
449 | |
450 def IsDefinition(self): | |
451 return False | |
452 | |
453 def IsExportable(self): | |
454 return False | |
455 | |
456 | |
457 class TypeConverter(object): | |
458 | |
459 def __init__(self, namespace_stack): | |
460 self.namespace_stack = namespace_stack | |
461 | |
462 def _GetTemplateEnd(self, tokens, start): | |
463 count = 1 | |
464 end = start | |
465 while 1: | |
466 token = tokens[end] | |
467 end += 1 | |
468 if token.name == '<': | |
469 count += 1 | |
470 elif token.name == '>': | |
471 count -= 1 | |
472 if count == 0: | |
473 break | |
474 return tokens[start:end-1], end | |
475 | |
476 def ToType(self, tokens): | |
477 """Convert [Token,...] to [Class(...), ] useful for base classes. | |
478 For example, code like class Foo : public Bar<x, y> { ... }; | |
479 the "Bar<x, y>" portion gets converted to an AST. | |
480 | |
481 Returns: | |
482 [Class(...), ...] | |
483 """ | |
484 result = [] | |
485 name_tokens = [] | |
486 reference = pointer = array = False | |
487 | |
488 def AddType(templated_types): | |
489 # Partition tokens into name and modifier tokens. | |
490 names = [] | |
491 modifiers = [] | |
492 for t in name_tokens: | |
493 if keywords.IsKeyword(t.name): | |
494 modifiers.append(t.name) | |
495 else: | |
496 names.append(t.name) | |
497 name = ''.join(names) | |
498 result.append(Type(name_tokens[0].start, name_tokens[-1].end, | |
499 name, templated_types, modifiers, | |
500 reference, pointer, array)) | |
501 del name_tokens[:] | |
502 | |
503 i = 0 | |
504 end = len(tokens) | |
505 while i < end: | |
506 token = tokens[i] | |
507 if token.name == '<': | |
508 new_tokens, new_end = self._GetTemplateEnd(tokens, i+1) | |
509 AddType(self.ToType(new_tokens)) | |
510 # If there is a comma after the template, we need to consume | |
511 # that here otherwise it becomes part of the name. | |
512 i = new_end | |
513 reference = pointer = array = False | |
514 elif token.name == ',': | |
515 AddType([]) | |
516 reference = pointer = array = False | |
517 elif token.name == '*': | |
518 pointer = True | |
519 elif token.name == '&': | |
520 reference = True | |
521 elif token.name == '[': | |
522 pointer = True | |
523 elif token.name == ']': | |
524 pass | |
525 else: | |
526 name_tokens.append(token) | |
527 i += 1 | |
528 | |
529 if name_tokens: | |
530 # No '<' in the tokens, just a simple name and no template. | |
531 AddType([]) | |
532 return result | |
533 | |
534 def DeclarationToParts(self, parts, needs_name_removed): | |
535 name = None | |
536 default = [] | |
537 if needs_name_removed: | |
538 # Handle default (initial) values properly. | |
539 for i, t in enumerate(parts): | |
540 if t.name == '=': | |
541 default = parts[i+1:] | |
542 name = parts[i-1].name | |
543 if name == ']' and parts[i-2].name == '[': | |
544 name = parts[i-3].name | |
545 i -= 1 | |
546 parts = parts[:i-1] | |
547 break | |
548 else: | |
549 if parts[-1].token_type == tokenize.NAME: | |
550 name = parts.pop().name | |
551 else: | |
552 # TODO(nnorwitz): this is a hack that happens for code like | |
553 # Register(Foo<T>); where it thinks this is a function call | |
554 # but it's actually a declaration. | |
555 name = '???' | |
556 modifiers = [] | |
557 type_name = [] | |
558 other_tokens = [] | |
559 templated_types = [] | |
560 i = 0 | |
561 end = len(parts) | |
562 while i < end: | |
563 p = parts[i] | |
564 if keywords.IsKeyword(p.name): | |
565 modifiers.append(p.name) | |
566 elif p.name == '<': | |
567 templated_tokens, new_end = self._GetTemplateEnd(parts, i+1) | |
568 templated_types = self.ToType(templated_tokens) | |
569 i = new_end - 1 | |
570 # Don't add a spurious :: to data members being initialized. | |
571 next_index = i + 1 | |
572 if next_index < end and parts[next_index].name == '::': | |
573 i += 1 | |
574 elif p.name in ('[', ']', '='): | |
575 # These are handled elsewhere. | |
576 other_tokens.append(p) | |
577 elif p.name not in ('*', '&', '>'): | |
578 # Ensure that names have a space between them. | |
579 if (type_name and type_name[-1].token_type == tokenize.NAME and | |
580 p.token_type == tokenize.NAME): | |
581 type_name.append(tokenize.Token(tokenize.SYNTAX, ' ', 0, 0)) | |
582 type_name.append(p) | |
583 else: | |
584 other_tokens.append(p) | |
585 i += 1 | |
586 type_name = ''.join([t.name for t in type_name]) | |
587 return name, type_name, templated_types, modifiers, default, other_token
s | |
588 | |
589 def ToParameters(self, tokens): | |
590 if not tokens: | |
591 return [] | |
592 | |
593 result = [] | |
594 name = type_name = '' | |
595 type_modifiers = [] | |
596 pointer = reference = array = False | |
597 first_token = None | |
598 default = [] | |
599 | |
600 def AddParameter(): | |
601 if default: | |
602 del default[0] # Remove flag. | |
603 end = type_modifiers[-1].end | |
604 parts = self.DeclarationToParts(type_modifiers, True) | |
605 (name, type_name, templated_types, modifiers, | |
606 unused_default, unused_other_tokens) = parts | |
607 parameter_type = Type(first_token.start, first_token.end, | |
608 type_name, templated_types, modifiers, | |
609 reference, pointer, array) | |
610 p = Parameter(first_token.start, end, name, | |
611 parameter_type, default) | |
612 result.append(p) | |
613 | |
614 template_count = 0 | |
615 for s in tokens: | |
616 if not first_token: | |
617 first_token = s | |
618 if s.name == '<': | |
619 template_count += 1 | |
620 elif s.name == '>': | |
621 template_count -= 1 | |
622 if template_count > 0: | |
623 type_modifiers.append(s) | |
624 continue | |
625 | |
626 if s.name == ',': | |
627 AddParameter() | |
628 name = type_name = '' | |
629 type_modifiers = [] | |
630 pointer = reference = array = False | |
631 first_token = None | |
632 default = [] | |
633 elif s.name == '*': | |
634 pointer = True | |
635 elif s.name == '&': | |
636 reference = True | |
637 elif s.name == '[': | |
638 array = True | |
639 elif s.name == ']': | |
640 pass # Just don't add to type_modifiers. | |
641 elif s.name == '=': | |
642 # Got a default value. Add any value (None) as a flag. | |
643 default.append(None) | |
644 elif default: | |
645 default.append(s) | |
646 else: | |
647 type_modifiers.append(s) | |
648 AddParameter() | |
649 return result | |
650 | |
651 def CreateReturnType(self, return_type_seq): | |
652 if not return_type_seq: | |
653 return None | |
654 start = return_type_seq[0].start | |
655 end = return_type_seq[-1].end | |
656 _, name, templated_types, modifiers, default, other_tokens = \ | |
657 self.DeclarationToParts(return_type_seq, False) | |
658 names = [n.name for n in other_tokens] | |
659 reference = '&' in names | |
660 pointer = '*' in names | |
661 array = '[' in names | |
662 return Type(start, end, name, templated_types, modifiers, | |
663 reference, pointer, array) | |
664 | |
665 def GetTemplateIndices(self, names): | |
666 # names is a list of strings. | |
667 start = names.index('<') | |
668 end = len(names) - 1 | |
669 while end > 0: | |
670 if names[end] == '>': | |
671 break | |
672 end -= 1 | |
673 return start, end+1 | |
674 | |
675 class AstBuilder(object): | |
676 def __init__(self, token_stream, filename, in_class='', visibility=None, | |
677 namespace_stack=[]): | |
678 self.tokens = token_stream | |
679 self.filename = filename | |
680 # TODO(nnorwitz): use a better data structure (deque) for the queue. | |
681 # Switching directions of the "queue" improved perf by about 25%. | |
682 # Using a deque should be even better since we access from both sides. | |
683 self.token_queue = [] | |
684 self.namespace_stack = namespace_stack[:] | |
685 self.in_class = in_class | |
686 if in_class is None: | |
687 self.in_class_name_only = None | |
688 else: | |
689 self.in_class_name_only = in_class.split('::')[-1] | |
690 self.visibility = visibility | |
691 self.in_function = False | |
692 self.current_token = None | |
693 # Keep the state whether we are currently handling a typedef or not. | |
694 self._handling_typedef = False | |
695 | |
696 self.converter = TypeConverter(self.namespace_stack) | |
697 | |
698 def HandleError(self, msg, token): | |
699 printable_queue = list(reversed(self.token_queue[-20:])) | |
700 sys.stderr.write('Got %s in %s @ %s %s\n' % | |
701 (msg, self.filename, token, printable_queue)) | |
702 | |
703 def Generate(self): | |
704 while 1: | |
705 token = self._GetNextToken() | |
706 if not token: | |
707 break | |
708 | |
709 # Get the next token. | |
710 self.current_token = token | |
711 | |
712 # Dispatch on the next token type. | |
713 if token.token_type == _INTERNAL_TOKEN: | |
714 if token.name == _NAMESPACE_POP: | |
715 self.namespace_stack.pop() | |
716 continue | |
717 | |
718 try: | |
719 result = self._GenerateOne(token) | |
720 if result is not None: | |
721 yield result | |
722 except: | |
723 self.HandleError('exception', token) | |
724 raise | |
725 | |
726 def _CreateVariable(self, pos_token, name, type_name, type_modifiers, | |
727 ref_pointer_name_seq, templated_types, value=None): | |
728 reference = '&' in ref_pointer_name_seq | |
729 pointer = '*' in ref_pointer_name_seq | |
730 array = '[' in ref_pointer_name_seq | |
731 var_type = Type(pos_token.start, pos_token.end, type_name, | |
732 templated_types, type_modifiers, | |
733 reference, pointer, array) | |
734 return VariableDeclaration(pos_token.start, pos_token.end, | |
735 name, var_type, value, self.namespace_stack) | |
736 | |
737 def _GenerateOne(self, token): | |
738 if token.token_type == tokenize.NAME: | |
739 if (keywords.IsKeyword(token.name) and | |
740 not keywords.IsBuiltinType(token.name)): | |
741 method = getattr(self, 'handle_' + token.name) | |
742 return method() | |
743 elif token.name == self.in_class_name_only: | |
744 # The token name is the same as the class, must be a ctor if | |
745 # there is a paren. Otherwise, it's the return type. | |
746 # Peek ahead to get the next token to figure out which. | |
747 next = self._GetNextToken() | |
748 self._AddBackToken(next) | |
749 if next.token_type == tokenize.SYNTAX and next.name == '(': | |
750 return self._GetMethod([token], FUNCTION_CTOR, None, True) | |
751 # Fall through--handle like any other method. | |
752 | |
753 # Handle data or function declaration/definition. | |
754 syntax = tokenize.SYNTAX | |
755 temp_tokens, last_token = \ | |
756 self._GetVarTokensUpTo(syntax, '(', ';', '{', '[') | |
757 temp_tokens.insert(0, token) | |
758 if last_token.name == '(': | |
759 # If there is an assignment before the paren, | |
760 # this is an expression, not a method. | |
761 expr = bool([e for e in temp_tokens if e.name == '=']) | |
762 if expr: | |
763 new_temp = self._GetTokensUpTo(tokenize.SYNTAX, ';') | |
764 temp_tokens.append(last_token) | |
765 temp_tokens.extend(new_temp) | |
766 last_token = tokenize.Token(tokenize.SYNTAX, ';', 0, 0) | |
767 | |
768 if last_token.name == '[': | |
769 # Handle array, this isn't a method, unless it's an operator. | |
770 # TODO(nnorwitz): keep the size somewhere. | |
771 # unused_size = self._GetTokensUpTo(tokenize.SYNTAX, ']') | |
772 temp_tokens.append(last_token) | |
773 if temp_tokens[-2].name == 'operator': | |
774 temp_tokens.append(self._GetNextToken()) | |
775 else: | |
776 temp_tokens2, last_token = \ | |
777 self._GetVarTokensUpTo(tokenize.SYNTAX, ';') | |
778 temp_tokens.extend(temp_tokens2) | |
779 | |
780 if last_token.name == ';': | |
781 # Handle data, this isn't a method. | |
782 parts = self.converter.DeclarationToParts(temp_tokens, True) | |
783 (name, type_name, templated_types, modifiers, default, | |
784 unused_other_tokens) = parts | |
785 | |
786 t0 = temp_tokens[0] | |
787 names = [t.name for t in temp_tokens] | |
788 if templated_types: | |
789 start, end = self.converter.GetTemplateIndices(names) | |
790 names = names[:start] + names[end:] | |
791 default = ''.join([t.name for t in default]) | |
792 return self._CreateVariable(t0, name, type_name, modifiers, | |
793 names, templated_types, default) | |
794 if last_token.name == '{': | |
795 self._AddBackTokens(temp_tokens[1:]) | |
796 self._AddBackToken(last_token) | |
797 method_name = temp_tokens[0].name | |
798 method = getattr(self, 'handle_' + method_name, None) | |
799 if not method: | |
800 # Must be declaring a variable. | |
801 # TODO(nnorwitz): handle the declaration. | |
802 return None | |
803 return method() | |
804 return self._GetMethod(temp_tokens, 0, None, False) | |
805 elif token.token_type == tokenize.SYNTAX: | |
806 if token.name == '~' and self.in_class: | |
807 # Must be a dtor (probably not in method body). | |
808 token = self._GetNextToken() | |
809 # self.in_class can contain A::Name, but the dtor will only | |
810 # be Name. Make sure to compare against the right value. | |
811 if (token.token_type == tokenize.NAME and | |
812 token.name == self.in_class_name_only): | |
813 return self._GetMethod([token], FUNCTION_DTOR, None, True) | |
814 # TODO(nnorwitz): handle a lot more syntax. | |
815 elif token.token_type == tokenize.PREPROCESSOR: | |
816 # TODO(nnorwitz): handle more preprocessor directives. | |
817 # token starts with a #, so remove it and strip whitespace. | |
818 name = token.name[1:].lstrip() | |
819 if name.startswith('include'): | |
820 # Remove "include". | |
821 name = name[7:].strip() | |
822 assert name | |
823 # Handle #include \<newline> "header-on-second-line.h". | |
824 if name.startswith('\\'): | |
825 name = name[1:].strip() | |
826 assert name[0] in '<"', token | |
827 assert name[-1] in '>"', token | |
828 system = name[0] == '<' | |
829 filename = name[1:-1] | |
830 return Include(token.start, token.end, filename, system) | |
831 if name.startswith('define'): | |
832 # Remove "define". | |
833 name = name[6:].strip() | |
834 assert name | |
835 value = '' | |
836 for i, c in enumerate(name): | |
837 if c.isspace(): | |
838 value = name[i:].lstrip() | |
839 name = name[:i] | |
840 break | |
841 return Define(token.start, token.end, name, value) | |
842 if name.startswith('if') and name[2:3].isspace(): | |
843 condition = name[3:].strip() | |
844 if condition.startswith('0') or condition.startswith('(0)'): | |
845 self._SkipIf0Blocks() | |
846 return None | |
847 | |
848 def _GetTokensUpTo(self, expected_token_type, expected_token): | |
849 return self._GetVarTokensUpTo(expected_token_type, expected_token)[0] | |
850 | |
851 def _GetVarTokensUpTo(self, expected_token_type, *expected_tokens): | |
852 last_token = self._GetNextToken() | |
853 tokens = [] | |
854 while (last_token.token_type != expected_token_type or | |
855 last_token.name not in expected_tokens): | |
856 tokens.append(last_token) | |
857 last_token = self._GetNextToken() | |
858 return tokens, last_token | |
859 | |
860 # TODO(nnorwitz): remove _IgnoreUpTo() it shouldn't be necesary. | |
861 def _IgnoreUpTo(self, token_type, token): | |
862 unused_tokens = self._GetTokensUpTo(token_type, token) | |
863 | |
864 def _SkipIf0Blocks(self): | |
865 count = 1 | |
866 while 1: | |
867 token = self._GetNextToken() | |
868 if token.token_type != tokenize.PREPROCESSOR: | |
869 continue | |
870 | |
871 name = token.name[1:].lstrip() | |
872 if name.startswith('endif'): | |
873 count -= 1 | |
874 if count == 0: | |
875 break | |
876 elif name.startswith('if'): | |
877 count += 1 | |
878 | |
879 def _GetMatchingChar(self, open_paren, close_paren, GetNextToken=None): | |
880 if GetNextToken is None: | |
881 GetNextToken = self._GetNextToken | |
882 # Assumes the current token is open_paren and we will consume | |
883 # and return up to the close_paren. | |
884 count = 1 | |
885 token = GetNextToken() | |
886 while 1: | |
887 if token.token_type == tokenize.SYNTAX: | |
888 if token.name == open_paren: | |
889 count += 1 | |
890 elif token.name == close_paren: | |
891 count -= 1 | |
892 if count == 0: | |
893 break | |
894 yield token | |
895 token = GetNextToken() | |
896 yield token | |
897 | |
898 def _GetParameters(self): | |
899 return self._GetMatchingChar('(', ')') | |
900 | |
901 def GetScope(self): | |
902 return self._GetMatchingChar('{', '}') | |
903 | |
904 def _GetNextToken(self): | |
905 if self.token_queue: | |
906 return self.token_queue.pop() | |
907 return next(self.tokens) | |
908 | |
909 def _AddBackToken(self, token): | |
910 if token.whence == tokenize.WHENCE_STREAM: | |
911 token.whence = tokenize.WHENCE_QUEUE | |
912 self.token_queue.insert(0, token) | |
913 else: | |
914 assert token.whence == tokenize.WHENCE_QUEUE, token | |
915 self.token_queue.append(token) | |
916 | |
917 def _AddBackTokens(self, tokens): | |
918 if tokens: | |
919 if tokens[-1].whence == tokenize.WHENCE_STREAM: | |
920 for token in tokens: | |
921 token.whence = tokenize.WHENCE_QUEUE | |
922 self.token_queue[:0] = reversed(tokens) | |
923 else: | |
924 assert tokens[-1].whence == tokenize.WHENCE_QUEUE, tokens | |
925 self.token_queue.extend(reversed(tokens)) | |
926 | |
927 def GetName(self, seq=None): | |
928 """Returns ([tokens], next_token_info).""" | |
929 GetNextToken = self._GetNextToken | |
930 if seq is not None: | |
931 it = iter(seq) | |
932 GetNextToken = lambda: next(it) | |
933 next_token = GetNextToken() | |
934 tokens = [] | |
935 last_token_was_name = False | |
936 while (next_token.token_type == tokenize.NAME or | |
937 (next_token.token_type == tokenize.SYNTAX and | |
938 next_token.name in ('::', '<'))): | |
939 # Two NAMEs in a row means the identifier should terminate. | |
940 # It's probably some sort of variable declaration. | |
941 if last_token_was_name and next_token.token_type == tokenize.NAME: | |
942 break | |
943 last_token_was_name = next_token.token_type == tokenize.NAME | |
944 tokens.append(next_token) | |
945 # Handle templated names. | |
946 if next_token.name == '<': | |
947 tokens.extend(self._GetMatchingChar('<', '>', GetNextToken)) | |
948 last_token_was_name = True | |
949 next_token = GetNextToken() | |
950 return tokens, next_token | |
951 | |
952 def GetMethod(self, modifiers, templated_types): | |
953 return_type_and_name = self._GetTokensUpTo(tokenize.SYNTAX, '(') | |
954 assert len(return_type_and_name) >= 1 | |
955 return self._GetMethod(return_type_and_name, modifiers, templated_types, | |
956 False) | |
957 | |
958 def _GetMethod(self, return_type_and_name, modifiers, templated_types, | |
959 get_paren): | |
960 template_portion = None | |
961 if get_paren: | |
962 token = self._GetNextToken() | |
963 assert token.token_type == tokenize.SYNTAX, token | |
964 if token.name == '<': | |
965 # Handle templatized dtors. | |
966 template_portion = [token] | |
967 template_portion.extend(self._GetMatchingChar('<', '>')) | |
968 token = self._GetNextToken() | |
969 assert token.token_type == tokenize.SYNTAX, token | |
970 assert token.name == '(', token | |
971 | |
972 name = return_type_and_name.pop() | |
973 # Handle templatized ctors. | |
974 if name.name == '>': | |
975 index = 1 | |
976 while return_type_and_name[index].name != '<': | |
977 index += 1 | |
978 template_portion = return_type_and_name[index:] + [name] | |
979 del return_type_and_name[index:] | |
980 name = return_type_and_name.pop() | |
981 elif name.name == ']': | |
982 rt = return_type_and_name | |
983 assert rt[-1].name == '[', return_type_and_name | |
984 assert rt[-2].name == 'operator', return_type_and_name | |
985 name_seq = return_type_and_name[-2:] | |
986 del return_type_and_name[-2:] | |
987 name = tokenize.Token(tokenize.NAME, 'operator[]', | |
988 name_seq[0].start, name.end) | |
989 # Get the open paren so _GetParameters() below works. | |
990 unused_open_paren = self._GetNextToken() | |
991 | |
992 # TODO(nnorwitz): store template_portion. | |
993 return_type = return_type_and_name | |
994 indices = name | |
995 if return_type: | |
996 indices = return_type[0] | |
997 | |
998 # Force ctor for templatized ctors. | |
999 if name.name == self.in_class and not modifiers: | |
1000 modifiers |= FUNCTION_CTOR | |
1001 parameters = list(self._GetParameters()) | |
1002 del parameters[-1] # Remove trailing ')'. | |
1003 | |
1004 # Handling operator() is especially weird. | |
1005 if name.name == 'operator' and not parameters: | |
1006 token = self._GetNextToken() | |
1007 assert token.name == '(', token | |
1008 parameters = list(self._GetParameters()) | |
1009 del parameters[-1] # Remove trailing ')'. | |
1010 | |
1011 token = self._GetNextToken() | |
1012 while token.token_type == tokenize.NAME: | |
1013 modifier_token = token | |
1014 token = self._GetNextToken() | |
1015 if modifier_token.name == 'const': | |
1016 modifiers |= FUNCTION_CONST | |
1017 elif modifier_token.name == '__attribute__': | |
1018 # TODO(nnorwitz): handle more __attribute__ details. | |
1019 modifiers |= FUNCTION_ATTRIBUTE | |
1020 assert token.name == '(', token | |
1021 # Consume everything between the (parens). | |
1022 unused_tokens = list(self._GetMatchingChar('(', ')')) | |
1023 token = self._GetNextToken() | |
1024 elif modifier_token.name == 'throw': | |
1025 modifiers |= FUNCTION_THROW | |
1026 assert token.name == '(', token | |
1027 # Consume everything between the (parens). | |
1028 unused_tokens = list(self._GetMatchingChar('(', ')')) | |
1029 token = self._GetNextToken() | |
1030 elif modifier_token.name == modifier_token.name.upper(): | |
1031 # HACK(nnorwitz): assume that all upper-case names | |
1032 # are some macro we aren't expanding. | |
1033 modifiers |= FUNCTION_UNKNOWN_ANNOTATION | |
1034 else: | |
1035 self.HandleError('unexpected token', modifier_token) | |
1036 | |
1037 assert token.token_type == tokenize.SYNTAX, token | |
1038 # Handle ctor initializers. | |
1039 if token.name == ':': | |
1040 # TODO(nnorwitz): anything else to handle for initializer list? | |
1041 while token.name != ';' and token.name != '{': | |
1042 token = self._GetNextToken() | |
1043 | |
1044 # Handle pointer to functions that are really data but look | |
1045 # like method declarations. | |
1046 if token.name == '(': | |
1047 if parameters[0].name == '*': | |
1048 # name contains the return type. | |
1049 name = parameters.pop() | |
1050 # parameters contains the name of the data. | |
1051 modifiers = [p.name for p in parameters] | |
1052 # Already at the ( to open the parameter list. | |
1053 function_parameters = list(self._GetMatchingChar('(', ')')) | |
1054 del function_parameters[-1] # Remove trailing ')'. | |
1055 # TODO(nnorwitz): store the function_parameters. | |
1056 token = self._GetNextToken() | |
1057 assert token.token_type == tokenize.SYNTAX, token | |
1058 assert token.name == ';', token | |
1059 return self._CreateVariable(indices, name.name, indices.name, | |
1060 modifiers, '', None) | |
1061 # At this point, we got something like: | |
1062 # return_type (type::*name_)(params); | |
1063 # This is a data member called name_ that is a function pointer. | |
1064 # With this code: void (sq_type::*field_)(string&); | |
1065 # We get: name=void return_type=[] parameters=sq_type ... field_ | |
1066 # TODO(nnorwitz): is return_type always empty? | |
1067 # TODO(nnorwitz): this isn't even close to being correct. | |
1068 # Just put in something so we don't crash and can move on. | |
1069 real_name = parameters[-1] | |
1070 modifiers = [p.name for p in self._GetParameters()] | |
1071 del modifiers[-1] # Remove trailing ')'. | |
1072 return self._CreateVariable(indices, real_name.name, indices.name, | |
1073 modifiers, '', None) | |
1074 | |
1075 if token.name == '{': | |
1076 body = list(self.GetScope()) | |
1077 del body[-1] # Remove trailing '}'. | |
1078 else: | |
1079 body = None | |
1080 if token.name == '=': | |
1081 token = self._GetNextToken() | |
1082 assert token.token_type == tokenize.CONSTANT, token | |
1083 assert token.name == '0', token | |
1084 modifiers |= FUNCTION_PURE_VIRTUAL | |
1085 token = self._GetNextToken() | |
1086 | |
1087 if token.name == '[': | |
1088 # TODO(nnorwitz): store tokens and improve parsing. | |
1089 # template <typename T, size_t N> char (&ASH(T (&seq)[N]))[N]; | |
1090 tokens = list(self._GetMatchingChar('[', ']')) | |
1091 token = self._GetNextToken() | |
1092 | |
1093 assert token.name == ';', (token, return_type_and_name, parameters) | |
1094 | |
1095 # Looks like we got a method, not a function. | |
1096 if len(return_type) > 2 and return_type[-1].name == '::': | |
1097 return_type, in_class = \ | |
1098 self._GetReturnTypeAndClassName(return_type) | |
1099 return Method(indices.start, indices.end, name.name, in_class, | |
1100 return_type, parameters, modifiers, templated_types, | |
1101 body, self.namespace_stack) | |
1102 return Function(indices.start, indices.end, name.name, return_type, | |
1103 parameters, modifiers, templated_types, body, | |
1104 self.namespace_stack) | |
1105 | |
1106 def _GetReturnTypeAndClassName(self, token_seq): | |
1107 # Splitting the return type from the class name in a method | |
1108 # can be tricky. For example, Return::Type::Is::Hard::To::Find(). | |
1109 # Where is the return type and where is the class name? | |
1110 # The heuristic used is to pull the last name as the class name. | |
1111 # This includes all the templated type info. | |
1112 # TODO(nnorwitz): if there is only One name like in the | |
1113 # example above, punt and assume the last bit is the class name. | |
1114 | |
1115 # Ignore a :: prefix, if exists so we can find the first real name. | |
1116 i = 0 | |
1117 if token_seq[0].name == '::': | |
1118 i = 1 | |
1119 # Ignore a :: suffix, if exists. | |
1120 end = len(token_seq) - 1 | |
1121 if token_seq[end-1].name == '::': | |
1122 end -= 1 | |
1123 | |
1124 # Make a copy of the sequence so we can append a sentinel | |
1125 # value. This is required for GetName will has to have some | |
1126 # terminating condition beyond the last name. | |
1127 seq_copy = token_seq[i:end] | |
1128 seq_copy.append(tokenize.Token(tokenize.SYNTAX, '', 0, 0)) | |
1129 names = [] | |
1130 while i < end: | |
1131 # Iterate through the sequence parsing out each name. | |
1132 new_name, next = self.GetName(seq_copy[i:]) | |
1133 assert new_name, 'Got empty new_name, next=%s' % next | |
1134 # We got a pointer or ref. Add it to the name. | |
1135 if next and next.token_type == tokenize.SYNTAX: | |
1136 new_name.append(next) | |
1137 names.append(new_name) | |
1138 i += len(new_name) | |
1139 | |
1140 # Now that we have the names, it's time to undo what we did. | |
1141 | |
1142 # Remove the sentinel value. | |
1143 names[-1].pop() | |
1144 # Flatten the token sequence for the return type. | |
1145 return_type = [e for seq in names[:-1] for e in seq] | |
1146 # The class name is the last name. | |
1147 class_name = names[-1] | |
1148 return return_type, class_name | |
1149 | |
1150 def handle_bool(self): | |
1151 pass | |
1152 | |
1153 def handle_char(self): | |
1154 pass | |
1155 | |
1156 def handle_int(self): | |
1157 pass | |
1158 | |
1159 def handle_long(self): | |
1160 pass | |
1161 | |
1162 def handle_short(self): | |
1163 pass | |
1164 | |
1165 def handle_double(self): | |
1166 pass | |
1167 | |
1168 def handle_float(self): | |
1169 pass | |
1170 | |
1171 def handle_void(self): | |
1172 pass | |
1173 | |
1174 def handle_wchar_t(self): | |
1175 pass | |
1176 | |
1177 def handle_unsigned(self): | |
1178 pass | |
1179 | |
1180 def handle_signed(self): | |
1181 pass | |
1182 | |
1183 def _GetNestedType(self, ctor): | |
1184 name = None | |
1185 name_tokens, token = self.GetName() | |
1186 if name_tokens: | |
1187 name = ''.join([t.name for t in name_tokens]) | |
1188 | |
1189 # Handle forward declarations. | |
1190 if token.token_type == tokenize.SYNTAX and token.name == ';': | |
1191 return ctor(token.start, token.end, name, None, | |
1192 self.namespace_stack) | |
1193 | |
1194 if token.token_type == tokenize.NAME and self._handling_typedef: | |
1195 self._AddBackToken(token) | |
1196 return ctor(token.start, token.end, name, None, | |
1197 self.namespace_stack) | |
1198 | |
1199 # Must be the type declaration. | |
1200 fields = list(self._GetMatchingChar('{', '}')) | |
1201 del fields[-1] # Remove trailing '}'. | |
1202 if token.token_type == tokenize.SYNTAX and token.name == '{': | |
1203 next = self._GetNextToken() | |
1204 new_type = ctor(token.start, token.end, name, fields, | |
1205 self.namespace_stack) | |
1206 # A name means this is an anonymous type and the name | |
1207 # is the variable declaration. | |
1208 if next.token_type != tokenize.NAME: | |
1209 return new_type | |
1210 name = new_type | |
1211 token = next | |
1212 | |
1213 # Must be variable declaration using the type prefixed with keyword. | |
1214 assert token.token_type == tokenize.NAME, token | |
1215 return self._CreateVariable(token, token.name, name, [], '', None) | |
1216 | |
1217 def handle_struct(self): | |
1218 # Special case the handling typedef/aliasing of structs here. | |
1219 # It would be a pain to handle in the class code. | |
1220 name_tokens, var_token = self.GetName() | |
1221 if name_tokens: | |
1222 next_token = self._GetNextToken() | |
1223 is_syntax = (var_token.token_type == tokenize.SYNTAX and | |
1224 var_token.name[0] in '*&') | |
1225 is_variable = (var_token.token_type == tokenize.NAME and | |
1226 next_token.name == ';') | |
1227 variable = var_token | |
1228 if is_syntax and not is_variable: | |
1229 variable = next_token | |
1230 temp = self._GetNextToken() | |
1231 if temp.token_type == tokenize.SYNTAX and temp.name == '(': | |
1232 # Handle methods declared to return a struct. | |
1233 t0 = name_tokens[0] | |
1234 struct = tokenize.Token(tokenize.NAME, 'struct', | |
1235 t0.start-7, t0.start-2) | |
1236 type_and_name = [struct] | |
1237 type_and_name.extend(name_tokens) | |
1238 type_and_name.extend((var_token, next_token)) | |
1239 return self._GetMethod(type_and_name, 0, None, False) | |
1240 assert temp.name == ';', (temp, name_tokens, var_token) | |
1241 if is_syntax or (is_variable and not self._handling_typedef): | |
1242 modifiers = ['struct'] | |
1243 type_name = ''.join([t.name for t in name_tokens]) | |
1244 position = name_tokens[0] | |
1245 return self._CreateVariable(position, variable.name, type_name, | |
1246 modifiers, var_token.name, None) | |
1247 name_tokens.extend((var_token, next_token)) | |
1248 self._AddBackTokens(name_tokens) | |
1249 else: | |
1250 self._AddBackToken(var_token) | |
1251 return self._GetClass(Struct, VISIBILITY_PUBLIC, None) | |
1252 | |
1253 def handle_union(self): | |
1254 return self._GetNestedType(Union) | |
1255 | |
1256 def handle_enum(self): | |
1257 return self._GetNestedType(Enum) | |
1258 | |
1259 def handle_auto(self): | |
1260 # TODO(nnorwitz): warn about using auto? Probably not since it | |
1261 # will be reclaimed and useful for C++0x. | |
1262 pass | |
1263 | |
1264 def handle_register(self): | |
1265 pass | |
1266 | |
1267 def handle_const(self): | |
1268 pass | |
1269 | |
1270 def handle_inline(self): | |
1271 pass | |
1272 | |
1273 def handle_extern(self): | |
1274 pass | |
1275 | |
1276 def handle_static(self): | |
1277 pass | |
1278 | |
1279 def handle_virtual(self): | |
1280 # What follows must be a method. | |
1281 token = token2 = self._GetNextToken() | |
1282 if token.name == 'inline': | |
1283 # HACK(nnorwitz): handle inline dtors by ignoring 'inline'. | |
1284 token2 = self._GetNextToken() | |
1285 if token2.token_type == tokenize.SYNTAX and token2.name == '~': | |
1286 return self.GetMethod(FUNCTION_VIRTUAL + FUNCTION_DTOR, None) | |
1287 assert token.token_type == tokenize.NAME or token.name == '::', token | |
1288 return_type_and_name = self._GetTokensUpTo(tokenize.SYNTAX, '(') | |
1289 return_type_and_name.insert(0, token) | |
1290 if token2 is not token: | |
1291 return_type_and_name.insert(1, token2) | |
1292 return self._GetMethod(return_type_and_name, FUNCTION_VIRTUAL, | |
1293 None, False) | |
1294 | |
1295 def handle_volatile(self): | |
1296 pass | |
1297 | |
1298 def handle_mutable(self): | |
1299 pass | |
1300 | |
1301 def handle_public(self): | |
1302 assert self.in_class | |
1303 self.visibility = VISIBILITY_PUBLIC | |
1304 | |
1305 def handle_protected(self): | |
1306 assert self.in_class | |
1307 self.visibility = VISIBILITY_PROTECTED | |
1308 | |
1309 def handle_private(self): | |
1310 assert self.in_class | |
1311 self.visibility = VISIBILITY_PRIVATE | |
1312 | |
1313 def handle_friend(self): | |
1314 tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';') | |
1315 assert tokens | |
1316 t0 = tokens[0] | |
1317 return Friend(t0.start, t0.end, tokens, self.namespace_stack) | |
1318 | |
1319 def handle_static_cast(self): | |
1320 pass | |
1321 | |
1322 def handle_const_cast(self): | |
1323 pass | |
1324 | |
1325 def handle_dynamic_cast(self): | |
1326 pass | |
1327 | |
1328 def handle_reinterpret_cast(self): | |
1329 pass | |
1330 | |
1331 def handle_new(self): | |
1332 pass | |
1333 | |
1334 def handle_delete(self): | |
1335 tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';') | |
1336 assert tokens | |
1337 return Delete(tokens[0].start, tokens[0].end, tokens) | |
1338 | |
1339 def handle_typedef(self): | |
1340 token = self._GetNextToken() | |
1341 if (token.token_type == tokenize.NAME and | |
1342 keywords.IsKeyword(token.name)): | |
1343 # Token must be struct/enum/union/class. | |
1344 method = getattr(self, 'handle_' + token.name) | |
1345 self._handling_typedef = True | |
1346 tokens = [method()] | |
1347 self._handling_typedef = False | |
1348 else: | |
1349 tokens = [token] | |
1350 | |
1351 # Get the remainder of the typedef up to the semi-colon. | |
1352 tokens.extend(self._GetTokensUpTo(tokenize.SYNTAX, ';')) | |
1353 | |
1354 # TODO(nnorwitz): clean all this up. | |
1355 assert tokens | |
1356 name = tokens.pop() | |
1357 indices = name | |
1358 if tokens: | |
1359 indices = tokens[0] | |
1360 if not indices: | |
1361 indices = token | |
1362 if name.name == ')': | |
1363 # HACK(nnorwitz): Handle pointers to functions "properly". | |
1364 if (len(tokens) >= 4 and | |
1365 tokens[1].name == '(' and tokens[2].name == '*'): | |
1366 tokens.append(name) | |
1367 name = tokens[3] | |
1368 elif name.name == ']': | |
1369 # HACK(nnorwitz): Handle arrays properly. | |
1370 if len(tokens) >= 2: | |
1371 tokens.append(name) | |
1372 name = tokens[1] | |
1373 new_type = tokens | |
1374 if tokens and isinstance(tokens[0], tokenize.Token): | |
1375 new_type = self.converter.ToType(tokens)[0] | |
1376 return Typedef(indices.start, indices.end, name.name, | |
1377 new_type, self.namespace_stack) | |
1378 | |
1379 def handle_typeid(self): | |
1380 pass # Not needed yet. | |
1381 | |
1382 def handle_typename(self): | |
1383 pass # Not needed yet. | |
1384 | |
1385 def _GetTemplatedTypes(self): | |
1386 result = {} | |
1387 tokens = list(self._GetMatchingChar('<', '>')) | |
1388 len_tokens = len(tokens) - 1 # Ignore trailing '>'. | |
1389 i = 0 | |
1390 while i < len_tokens: | |
1391 key = tokens[i].name | |
1392 i += 1 | |
1393 if keywords.IsKeyword(key) or key == ',': | |
1394 continue | |
1395 type_name = default = None | |
1396 if i < len_tokens: | |
1397 i += 1 | |
1398 if tokens[i-1].name == '=': | |
1399 assert i < len_tokens, '%s %s' % (i, tokens) | |
1400 default, unused_next_token = self.GetName(tokens[i:]) | |
1401 i += len(default) | |
1402 else: | |
1403 if tokens[i-1].name != ',': | |
1404 # We got something like: Type variable. | |
1405 # Re-adjust the key (variable) and type_name (Type). | |
1406 key = tokens[i-1].name | |
1407 type_name = tokens[i-2] | |
1408 | |
1409 result[key] = (type_name, default) | |
1410 return result | |
1411 | |
1412 def handle_template(self): | |
1413 token = self._GetNextToken() | |
1414 assert token.token_type == tokenize.SYNTAX, token | |
1415 assert token.name == '<', token | |
1416 templated_types = self._GetTemplatedTypes() | |
1417 # TODO(nnorwitz): for now, just ignore the template params. | |
1418 token = self._GetNextToken() | |
1419 if token.token_type == tokenize.NAME: | |
1420 if token.name == 'class': | |
1421 return self._GetClass(Class, VISIBILITY_PRIVATE, templated_types
) | |
1422 elif token.name == 'struct': | |
1423 return self._GetClass(Struct, VISIBILITY_PUBLIC, templated_types
) | |
1424 elif token.name == 'friend': | |
1425 return self.handle_friend() | |
1426 self._AddBackToken(token) | |
1427 tokens, last = self._GetVarTokensUpTo(tokenize.SYNTAX, '(', ';') | |
1428 tokens.append(last) | |
1429 self._AddBackTokens(tokens) | |
1430 if last.name == '(': | |
1431 return self.GetMethod(FUNCTION_NONE, templated_types) | |
1432 # Must be a variable definition. | |
1433 return None | |
1434 | |
1435 def handle_true(self): | |
1436 pass # Nothing to do. | |
1437 | |
1438 def handle_false(self): | |
1439 pass # Nothing to do. | |
1440 | |
1441 def handle_asm(self): | |
1442 pass # Not needed yet. | |
1443 | |
1444 def handle_class(self): | |
1445 return self._GetClass(Class, VISIBILITY_PRIVATE, None) | |
1446 | |
1447 def _GetBases(self): | |
1448 # Get base classes. | |
1449 bases = [] | |
1450 while 1: | |
1451 token = self._GetNextToken() | |
1452 assert token.token_type == tokenize.NAME, token | |
1453 # TODO(nnorwitz): store kind of inheritance...maybe. | |
1454 if token.name not in ('public', 'protected', 'private'): | |
1455 # If inheritance type is not specified, it is private. | |
1456 # Just put the token back so we can form a name. | |
1457 # TODO(nnorwitz): it would be good to warn about this. | |
1458 self._AddBackToken(token) | |
1459 else: | |
1460 # Check for virtual inheritance. | |
1461 token = self._GetNextToken() | |
1462 if token.name != 'virtual': | |
1463 self._AddBackToken(token) | |
1464 else: | |
1465 # TODO(nnorwitz): store that we got virtual for this base. | |
1466 pass | |
1467 base, next_token = self.GetName() | |
1468 bases_ast = self.converter.ToType(base) | |
1469 assert len(bases_ast) == 1, bases_ast | |
1470 bases.append(bases_ast[0]) | |
1471 assert next_token.token_type == tokenize.SYNTAX, next_token | |
1472 if next_token.name == '{': | |
1473 token = next_token | |
1474 break | |
1475 # Support multiple inheritance. | |
1476 assert next_token.name == ',', next_token | |
1477 return bases, token | |
1478 | |
1479 def _GetClass(self, class_type, visibility, templated_types): | |
1480 class_name = None | |
1481 class_token = self._GetNextToken() | |
1482 if class_token.token_type != tokenize.NAME: | |
1483 assert class_token.token_type == tokenize.SYNTAX, class_token | |
1484 token = class_token | |
1485 else: | |
1486 self._AddBackToken(class_token) | |
1487 name_tokens, token = self.GetName() | |
1488 class_name = ''.join([t.name for t in name_tokens]) | |
1489 bases = None | |
1490 if token.token_type == tokenize.SYNTAX: | |
1491 if token.name == ';': | |
1492 # Forward declaration. | |
1493 return class_type(class_token.start, class_token.end, | |
1494 class_name, None, templated_types, None, | |
1495 self.namespace_stack) | |
1496 if token.name in '*&': | |
1497 # Inline forward declaration. Could be method or data. | |
1498 name_token = self._GetNextToken() | |
1499 next_token = self._GetNextToken() | |
1500 if next_token.name == ';': | |
1501 # Handle data | |
1502 modifiers = ['class'] | |
1503 return self._CreateVariable(class_token, name_token.name, | |
1504 class_name, | |
1505 modifiers, token.name, None) | |
1506 else: | |
1507 # Assume this is a method. | |
1508 tokens = (class_token, token, name_token, next_token) | |
1509 self._AddBackTokens(tokens) | |
1510 return self.GetMethod(FUNCTION_NONE, None) | |
1511 if token.name == ':': | |
1512 bases, token = self._GetBases() | |
1513 | |
1514 body = None | |
1515 if token.token_type == tokenize.SYNTAX and token.name == '{': | |
1516 assert token.token_type == tokenize.SYNTAX, token | |
1517 assert token.name == '{', token | |
1518 | |
1519 ast = AstBuilder(self.GetScope(), self.filename, class_name, | |
1520 visibility, self.namespace_stack) | |
1521 body = list(ast.Generate()) | |
1522 | |
1523 if not self._handling_typedef: | |
1524 token = self._GetNextToken() | |
1525 if token.token_type != tokenize.NAME: | |
1526 assert token.token_type == tokenize.SYNTAX, token | |
1527 assert token.name == ';', token | |
1528 else: | |
1529 new_class = class_type(class_token.start, class_token.end, | |
1530 class_name, bases, None, | |
1531 body, self.namespace_stack) | |
1532 | |
1533 modifiers = [] | |
1534 return self._CreateVariable(class_token, | |
1535 token.name, new_class, | |
1536 modifiers, token.name, None) | |
1537 else: | |
1538 if not self._handling_typedef: | |
1539 self.HandleError('non-typedef token', token) | |
1540 self._AddBackToken(token) | |
1541 | |
1542 return class_type(class_token.start, class_token.end, class_name, | |
1543 bases, None, body, self.namespace_stack) | |
1544 | |
1545 def handle_namespace(self): | |
1546 token = self._GetNextToken() | |
1547 # Support anonymous namespaces. | |
1548 name = None | |
1549 if token.token_type == tokenize.NAME: | |
1550 name = token.name | |
1551 token = self._GetNextToken() | |
1552 self.namespace_stack.append(name) | |
1553 assert token.token_type == tokenize.SYNTAX, token | |
1554 # Create an internal token that denotes when the namespace is complete. | |
1555 internal_token = tokenize.Token(_INTERNAL_TOKEN, _NAMESPACE_POP, | |
1556 None, None) | |
1557 internal_token.whence = token.whence | |
1558 if token.name == '=': | |
1559 # TODO(nnorwitz): handle aliasing namespaces. | |
1560 name, next_token = self.GetName() | |
1561 assert next_token.name == ';', next_token | |
1562 self._AddBackToken(internal_token) | |
1563 else: | |
1564 assert token.name == '{', token | |
1565 tokens = list(self.GetScope()) | |
1566 # Replace the trailing } with the internal namespace pop token. | |
1567 tokens[-1] = internal_token | |
1568 # Handle namespace with nothing in it. | |
1569 self._AddBackTokens(tokens) | |
1570 return None | |
1571 | |
1572 def handle_using(self): | |
1573 tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';') | |
1574 assert tokens | |
1575 return Using(tokens[0].start, tokens[0].end, tokens) | |
1576 | |
1577 def handle_explicit(self): | |
1578 assert self.in_class | |
1579 # Nothing much to do. | |
1580 # TODO(nnorwitz): maybe verify the method name == class name. | |
1581 # This must be a ctor. | |
1582 return self.GetMethod(FUNCTION_CTOR, None) | |
1583 | |
1584 def handle_this(self): | |
1585 pass # Nothing to do. | |
1586 | |
1587 def handle_operator(self): | |
1588 # Pull off the next token(s?) and make that part of the method name. | |
1589 pass | |
1590 | |
1591 def handle_sizeof(self): | |
1592 pass | |
1593 | |
1594 def handle_case(self): | |
1595 pass | |
1596 | |
1597 def handle_switch(self): | |
1598 pass | |
1599 | |
1600 def handle_default(self): | |
1601 token = self._GetNextToken() | |
1602 assert token.token_type == tokenize.SYNTAX | |
1603 assert token.name == ':' | |
1604 | |
1605 def handle_if(self): | |
1606 pass | |
1607 | |
1608 def handle_else(self): | |
1609 pass | |
1610 | |
1611 def handle_return(self): | |
1612 tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';') | |
1613 if not tokens: | |
1614 return Return(self.current_token.start, self.current_token.end, None
) | |
1615 return Return(tokens[0].start, tokens[0].end, tokens) | |
1616 | |
1617 def handle_goto(self): | |
1618 tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';') | |
1619 assert len(tokens) == 1, str(tokens) | |
1620 return Goto(tokens[0].start, tokens[0].end, tokens[0].name) | |
1621 | |
1622 def handle_try(self): | |
1623 pass # Not needed yet. | |
1624 | |
1625 def handle_catch(self): | |
1626 pass # Not needed yet. | |
1627 | |
1628 def handle_throw(self): | |
1629 pass # Not needed yet. | |
1630 | |
1631 def handle_while(self): | |
1632 pass | |
1633 | |
1634 def handle_do(self): | |
1635 pass | |
1636 | |
1637 def handle_for(self): | |
1638 pass | |
1639 | |
1640 def handle_break(self): | |
1641 self._IgnoreUpTo(tokenize.SYNTAX, ';') | |
1642 | |
1643 def handle_continue(self): | |
1644 self._IgnoreUpTo(tokenize.SYNTAX, ';') | |
1645 | |
1646 | |
1647 def BuilderFromSource(source, filename): | |
1648 """Utility method that returns an AstBuilder from source code. | |
1649 | |
1650 Args: | |
1651 source: 'C++ source code' | |
1652 filename: 'file1' | |
1653 | |
1654 Returns: | |
1655 AstBuilder | |
1656 """ | |
1657 return AstBuilder(tokenize.GetTokens(source), filename) | |
1658 | |
1659 | |
1660 def PrintIndentifiers(filename, should_print): | |
1661 """Prints all identifiers for a C++ source file. | |
1662 | |
1663 Args: | |
1664 filename: 'file1' | |
1665 should_print: predicate with signature: bool Function(token) | |
1666 """ | |
1667 source = utils.ReadFile(filename, False) | |
1668 if source is None: | |
1669 sys.stderr.write('Unable to find: %s\n' % filename) | |
1670 return | |
1671 | |
1672 #print('Processing %s' % actual_filename) | |
1673 builder = BuilderFromSource(source, filename) | |
1674 try: | |
1675 for node in builder.Generate(): | |
1676 if should_print(node): | |
1677 print(node.name) | |
1678 except KeyboardInterrupt: | |
1679 return | |
1680 except: | |
1681 pass | |
1682 | |
1683 | |
1684 def PrintAllIndentifiers(filenames, should_print): | |
1685 """Prints all identifiers for each C++ source file in filenames. | |
1686 | |
1687 Args: | |
1688 filenames: ['file1', 'file2', ...] | |
1689 should_print: predicate with signature: bool Function(token) | |
1690 """ | |
1691 for path in filenames: | |
1692 PrintIndentifiers(path, should_print) | |
1693 | |
1694 | |
1695 def main(argv): | |
1696 for filename in argv[1:]: | |
1697 source = utils.ReadFile(filename) | |
1698 if source is None: | |
1699 continue | |
1700 | |
1701 print('Processing %s' % filename) | |
1702 builder = BuilderFromSource(source, filename) | |
1703 try: | |
1704 entire_ast = filter(None, builder.Generate()) | |
1705 except KeyboardInterrupt: | |
1706 return | |
1707 except: | |
1708 # Already printed a warning, print the traceback and continue. | |
1709 traceback.print_exc() | |
1710 else: | |
1711 if utils.DEBUG: | |
1712 for ast in entire_ast: | |
1713 print(ast) | |
1714 | |
1715 | |
1716 if __name__ == '__main__': | |
1717 main(sys.argv) | |
OLD | NEW |