OLD | NEW |
| (Empty) |
1 #!/usr/bin/env python | |
2 # | |
3 # Copyright 2010 The Closure Linter Authors. All Rights Reserved. | |
4 # | |
5 # Licensed under the Apache License, Version 2.0 (the "License"); | |
6 # you may not use this file except in compliance with the License. | |
7 # You may obtain a copy of the License at | |
8 # | |
9 # http://www.apache.org/licenses/LICENSE-2.0 | |
10 # | |
11 # Unless required by applicable law or agreed to in writing, software | |
12 # distributed under the License is distributed on an "AS-IS" BASIS, | |
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
14 # See the License for the specific language governing permissions and | |
15 # limitations under the License. | |
16 | |
17 """Metadata pass for annotating tokens in EcmaScript files.""" | |
18 | |
19 __author__ = ('robbyw@google.com (Robert Walker)') | |
20 | |
21 from closure_linter import javascripttokens | |
22 from closure_linter import tokenutil | |
23 | |
24 | |
25 TokenType = javascripttokens.JavaScriptTokenType | |
26 | |
27 | |
28 class ParseError(Exception): | |
29 """Exception indicating a parse error at the given token. | |
30 | |
31 Attributes: | |
32 token: The token where the parse error occurred. | |
33 """ | |
34 | |
35 def __init__(self, token, message=None): | |
36 """Initialize a parse error at the given token with an optional message. | |
37 | |
38 Args: | |
39 token: The token where the parse error occurred. | |
40 message: A message describing the parse error. | |
41 """ | |
42 Exception.__init__(self, message) | |
43 self.token = token | |
44 | |
45 | |
46 class EcmaContext(object): | |
47 """Context object for EcmaScript languages. | |
48 | |
49 Attributes: | |
50 type: The context type. | |
51 start_token: The token where this context starts. | |
52 end_token: The token where this context ends. | |
53 parent: The parent context. | |
54 """ | |
55 | |
56 # The root context. | |
57 ROOT = 'root' | |
58 | |
59 # A block of code. | |
60 BLOCK = 'block' | |
61 | |
62 # A pseudo-block of code for a given case or default section. | |
63 CASE_BLOCK = 'case_block' | |
64 | |
65 # Block of statements in a for loop's parentheses. | |
66 FOR_GROUP_BLOCK = 'for_block' | |
67 | |
68 # An implied block of code for 1 line if, while, and for statements | |
69 IMPLIED_BLOCK = 'implied_block' | |
70 | |
71 # An index in to an array or object. | |
72 INDEX = 'index' | |
73 | |
74 # An array literal in []. | |
75 ARRAY_LITERAL = 'array_literal' | |
76 | |
77 # An object literal in {}. | |
78 OBJECT_LITERAL = 'object_literal' | |
79 | |
80 # An individual element in an array or object literal. | |
81 LITERAL_ELEMENT = 'literal_element' | |
82 | |
83 # The portion of a ternary statement between ? and : | |
84 TERNARY_TRUE = 'ternary_true' | |
85 | |
86 # The portion of a ternary statment after : | |
87 TERNARY_FALSE = 'ternary_false' | |
88 | |
89 # The entire switch statment. This will contain a GROUP with the variable | |
90 # and a BLOCK with the code. | |
91 | |
92 # Since that BLOCK is not a normal block, it can not contain statements except | |
93 # for case and default. | |
94 SWITCH = 'switch' | |
95 | |
96 # A normal comment. | |
97 COMMENT = 'comment' | |
98 | |
99 # A JsDoc comment. | |
100 DOC = 'doc' | |
101 | |
102 # An individual statement. | |
103 STATEMENT = 'statement' | |
104 | |
105 # Code within parentheses. | |
106 GROUP = 'group' | |
107 | |
108 # Parameter names in a function declaration. | |
109 PARAMETERS = 'parameters' | |
110 | |
111 # A set of variable declarations appearing after the 'var' keyword. | |
112 VAR = 'var' | |
113 | |
114 # Context types that are blocks. | |
115 BLOCK_TYPES = frozenset([ | |
116 ROOT, BLOCK, CASE_BLOCK, FOR_GROUP_BLOCK, IMPLIED_BLOCK]) | |
117 | |
118 def __init__(self, context_type, start_token, parent=None): | |
119 """Initializes the context object. | |
120 | |
121 Args: | |
122 context_type: The context type. | |
123 start_token: The token where this context starts. | |
124 parent: The parent context. | |
125 | |
126 Attributes: | |
127 type: The context type. | |
128 start_token: The token where this context starts. | |
129 end_token: The token where this context ends. | |
130 parent: The parent context. | |
131 children: The child contexts of this context, in order. | |
132 """ | |
133 self.type = context_type | |
134 self.start_token = start_token | |
135 self.end_token = None | |
136 | |
137 self.parent = None | |
138 self.children = [] | |
139 | |
140 if parent: | |
141 parent.AddChild(self) | |
142 | |
143 def __repr__(self): | |
144 """Returns a string representation of the context object.""" | |
145 stack = [] | |
146 context = self | |
147 while context: | |
148 stack.append(context.type) | |
149 context = context.parent | |
150 return 'Context(%s)' % ' > '.join(stack) | |
151 | |
152 def AddChild(self, child): | |
153 """Adds a child to this context and sets child's parent to this context. | |
154 | |
155 Args: | |
156 child: A child EcmaContext. The child's parent will be set to this | |
157 context. | |
158 """ | |
159 | |
160 child.parent = self | |
161 | |
162 self.children.append(child) | |
163 self.children.sort(EcmaContext._CompareContexts) | |
164 | |
165 def GetRoot(self): | |
166 """Get the root context that contains this context, if any.""" | |
167 context = self | |
168 while context: | |
169 if context.type is EcmaContext.ROOT: | |
170 return context | |
171 context = context.parent | |
172 | |
173 @staticmethod | |
174 def _CompareContexts(context1, context2): | |
175 """Sorts contexts 1 and 2 by start token document position.""" | |
176 return tokenutil.Compare(context1.start_token, context2.start_token) | |
177 | |
178 | |
179 class EcmaMetaData(object): | |
180 """Token metadata for EcmaScript languages. | |
181 | |
182 Attributes: | |
183 last_code: The last code token to appear before this one. | |
184 context: The context this token appears in. | |
185 operator_type: The operator type, will be one of the *_OPERATOR constants | |
186 defined below. | |
187 aliased_symbol: The full symbol being identified, as a string (e.g. an | |
188 'XhrIo' alias for 'goog.net.XhrIo'). Only applicable to identifier | |
189 tokens. This is set in aliaspass.py and is a best guess. | |
190 is_alias_definition: True if the symbol is part of an alias definition. | |
191 If so, these symbols won't be counted towards goog.requires/provides. | |
192 """ | |
193 | |
194 UNARY_OPERATOR = 'unary' | |
195 | |
196 UNARY_POST_OPERATOR = 'unary_post' | |
197 | |
198 BINARY_OPERATOR = 'binary' | |
199 | |
200 TERNARY_OPERATOR = 'ternary' | |
201 | |
202 def __init__(self): | |
203 """Initializes a token metadata object.""" | |
204 self.last_code = None | |
205 self.context = None | |
206 self.operator_type = None | |
207 self.is_implied_semicolon = False | |
208 self.is_implied_block = False | |
209 self.is_implied_block_close = False | |
210 self.aliased_symbol = None | |
211 self.is_alias_definition = False | |
212 | |
213 def __repr__(self): | |
214 """Returns a string representation of the context object.""" | |
215 parts = ['%r' % self.context] | |
216 if self.operator_type: | |
217 parts.append('optype: %r' % self.operator_type) | |
218 if self.is_implied_semicolon: | |
219 parts.append('implied;') | |
220 if self.aliased_symbol: | |
221 parts.append('alias for: %s' % self.aliased_symbol) | |
222 return 'MetaData(%s)' % ', '.join(parts) | |
223 | |
224 def IsUnaryOperator(self): | |
225 return self.operator_type in (EcmaMetaData.UNARY_OPERATOR, | |
226 EcmaMetaData.UNARY_POST_OPERATOR) | |
227 | |
228 def IsUnaryPostOperator(self): | |
229 return self.operator_type == EcmaMetaData.UNARY_POST_OPERATOR | |
230 | |
231 | |
232 class EcmaMetaDataPass(object): | |
233 """A pass that iterates over all tokens and builds metadata about them.""" | |
234 | |
235 def __init__(self): | |
236 """Initialize the meta data pass object.""" | |
237 self.Reset() | |
238 | |
239 def Reset(self): | |
240 """Resets the metadata pass to prepare for the next file.""" | |
241 self._token = None | |
242 self._context = None | |
243 self._AddContext(EcmaContext.ROOT) | |
244 self._last_code = None | |
245 | |
246 def _CreateContext(self, context_type): | |
247 """Overridable by subclasses to create the appropriate context type.""" | |
248 return EcmaContext(context_type, self._token, self._context) | |
249 | |
250 def _CreateMetaData(self): | |
251 """Overridable by subclasses to create the appropriate metadata type.""" | |
252 return EcmaMetaData() | |
253 | |
254 def _AddContext(self, context_type): | |
255 """Adds a context of the given type to the context stack. | |
256 | |
257 Args: | |
258 context_type: The type of context to create | |
259 """ | |
260 self._context = self._CreateContext(context_type) | |
261 | |
262 def _PopContext(self): | |
263 """Moves up one level in the context stack. | |
264 | |
265 Returns: | |
266 The former context. | |
267 | |
268 Raises: | |
269 ParseError: If the root context is popped. | |
270 """ | |
271 top_context = self._context | |
272 top_context.end_token = self._token | |
273 self._context = top_context.parent | |
274 if self._context: | |
275 return top_context | |
276 else: | |
277 raise ParseError(self._token) | |
278 | |
279 def _PopContextType(self, *stop_types): | |
280 """Pops the context stack until a context of the given type is popped. | |
281 | |
282 Args: | |
283 *stop_types: The types of context to pop to - stops at the first match. | |
284 | |
285 Returns: | |
286 The context object of the given type that was popped. | |
287 """ | |
288 last = None | |
289 while not last or last.type not in stop_types: | |
290 last = self._PopContext() | |
291 return last | |
292 | |
293 def _EndStatement(self): | |
294 """Process the end of a statement.""" | |
295 self._PopContextType(EcmaContext.STATEMENT) | |
296 if self._context.type == EcmaContext.IMPLIED_BLOCK: | |
297 self._token.metadata.is_implied_block_close = True | |
298 self._PopContext() | |
299 | |
300 def _ProcessContext(self): | |
301 """Process the context at the current token. | |
302 | |
303 Returns: | |
304 The context that should be assigned to the current token, or None if | |
305 the current context after this method should be used. | |
306 | |
307 Raises: | |
308 ParseError: When the token appears in an invalid context. | |
309 """ | |
310 token = self._token | |
311 token_type = token.type | |
312 | |
313 if self._context.type in EcmaContext.BLOCK_TYPES: | |
314 # Whenever we're in a block, we add a statement context. We make an | |
315 # exception for switch statements since they can only contain case: and | |
316 # default: and therefore don't directly contain statements. | |
317 # The block we add here may be immediately removed in some cases, but | |
318 # that causes no harm. | |
319 parent = self._context.parent | |
320 if not parent or parent.type != EcmaContext.SWITCH: | |
321 self._AddContext(EcmaContext.STATEMENT) | |
322 | |
323 elif self._context.type == EcmaContext.ARRAY_LITERAL: | |
324 self._AddContext(EcmaContext.LITERAL_ELEMENT) | |
325 | |
326 if token_type == TokenType.START_PAREN: | |
327 if self._last_code and self._last_code.IsKeyword('for'): | |
328 # for loops contain multiple statements in the group unlike while, | |
329 # switch, if, etc. | |
330 self._AddContext(EcmaContext.FOR_GROUP_BLOCK) | |
331 else: | |
332 self._AddContext(EcmaContext.GROUP) | |
333 | |
334 elif token_type == TokenType.END_PAREN: | |
335 result = self._PopContextType(EcmaContext.GROUP, | |
336 EcmaContext.FOR_GROUP_BLOCK) | |
337 keyword_token = result.start_token.metadata.last_code | |
338 # keyword_token will not exist if the open paren is the first line of the | |
339 # file, for example if all code is wrapped in an immediately executed | |
340 # annonymous function. | |
341 if keyword_token and keyword_token.string in ('if', 'for', 'while'): | |
342 next_code = tokenutil.SearchExcept(token, TokenType.NON_CODE_TYPES) | |
343 if next_code.type != TokenType.START_BLOCK: | |
344 # Check for do-while. | |
345 is_do_while = False | |
346 pre_keyword_token = keyword_token.metadata.last_code | |
347 if (pre_keyword_token and | |
348 pre_keyword_token.type == TokenType.END_BLOCK): | |
349 start_block_token = pre_keyword_token.metadata.context.start_token | |
350 is_do_while = start_block_token.metadata.last_code.string == 'do' | |
351 | |
352 # If it's not do-while, it's an implied block. | |
353 if not is_do_while: | |
354 self._AddContext(EcmaContext.IMPLIED_BLOCK) | |
355 token.metadata.is_implied_block = True | |
356 | |
357 return result | |
358 | |
359 # else (not else if) with no open brace after it should be considered the | |
360 # start of an implied block, similar to the case with if, for, and while | |
361 # above. | |
362 elif (token_type == TokenType.KEYWORD and | |
363 token.string == 'else'): | |
364 next_code = tokenutil.SearchExcept(token, TokenType.NON_CODE_TYPES) | |
365 if (next_code.type != TokenType.START_BLOCK and | |
366 (next_code.type != TokenType.KEYWORD or next_code.string != 'if')): | |
367 self._AddContext(EcmaContext.IMPLIED_BLOCK) | |
368 token.metadata.is_implied_block = True | |
369 | |
370 elif token_type == TokenType.START_PARAMETERS: | |
371 self._AddContext(EcmaContext.PARAMETERS) | |
372 | |
373 elif token_type == TokenType.END_PARAMETERS: | |
374 return self._PopContextType(EcmaContext.PARAMETERS) | |
375 | |
376 elif token_type == TokenType.START_BRACKET: | |
377 if (self._last_code and | |
378 self._last_code.type in TokenType.EXPRESSION_ENDER_TYPES): | |
379 self._AddContext(EcmaContext.INDEX) | |
380 else: | |
381 self._AddContext(EcmaContext.ARRAY_LITERAL) | |
382 | |
383 elif token_type == TokenType.END_BRACKET: | |
384 return self._PopContextType(EcmaContext.INDEX, EcmaContext.ARRAY_LITERAL) | |
385 | |
386 elif token_type == TokenType.START_BLOCK: | |
387 if (self._last_code.type in (TokenType.END_PAREN, | |
388 TokenType.END_PARAMETERS) or | |
389 self._last_code.IsKeyword('else') or | |
390 self._last_code.IsKeyword('do') or | |
391 self._last_code.IsKeyword('try') or | |
392 self._last_code.IsKeyword('finally') or | |
393 (self._last_code.IsOperator(':') and | |
394 self._last_code.metadata.context.type == EcmaContext.CASE_BLOCK)): | |
395 # else, do, try, and finally all might have no () before {. | |
396 # Also, handle the bizzare syntax case 10: {...}. | |
397 self._AddContext(EcmaContext.BLOCK) | |
398 else: | |
399 self._AddContext(EcmaContext.OBJECT_LITERAL) | |
400 | |
401 elif token_type == TokenType.END_BLOCK: | |
402 context = self._PopContextType(EcmaContext.BLOCK, | |
403 EcmaContext.OBJECT_LITERAL) | |
404 if self._context.type == EcmaContext.SWITCH: | |
405 # The end of the block also means the end of the switch statement it | |
406 # applies to. | |
407 return self._PopContext() | |
408 return context | |
409 | |
410 elif token.IsKeyword('switch'): | |
411 self._AddContext(EcmaContext.SWITCH) | |
412 | |
413 elif (token_type == TokenType.KEYWORD and | |
414 token.string in ('case', 'default') and | |
415 self._context.type != EcmaContext.OBJECT_LITERAL): | |
416 # Pop up to but not including the switch block. | |
417 while self._context.parent.type != EcmaContext.SWITCH: | |
418 self._PopContext() | |
419 if self._context.parent is None: | |
420 raise ParseError(token, 'Encountered case/default statement ' | |
421 'without switch statement') | |
422 | |
423 elif token.IsOperator('?'): | |
424 self._AddContext(EcmaContext.TERNARY_TRUE) | |
425 | |
426 elif token.IsOperator(':'): | |
427 if self._context.type == EcmaContext.OBJECT_LITERAL: | |
428 self._AddContext(EcmaContext.LITERAL_ELEMENT) | |
429 | |
430 elif self._context.type == EcmaContext.TERNARY_TRUE: | |
431 self._PopContext() | |
432 self._AddContext(EcmaContext.TERNARY_FALSE) | |
433 | |
434 # Handle nested ternary statements like: | |
435 # foo = bar ? baz ? 1 : 2 : 3 | |
436 # When we encounter the second ":" the context is | |
437 # ternary_false > ternary_true > statement > root | |
438 elif (self._context.type == EcmaContext.TERNARY_FALSE and | |
439 self._context.parent.type == EcmaContext.TERNARY_TRUE): | |
440 self._PopContext() # Leave current ternary false context. | |
441 self._PopContext() # Leave current parent ternary true | |
442 self._AddContext(EcmaContext.TERNARY_FALSE) | |
443 | |
444 elif self._context.parent.type == EcmaContext.SWITCH: | |
445 self._AddContext(EcmaContext.CASE_BLOCK) | |
446 | |
447 elif token.IsKeyword('var'): | |
448 self._AddContext(EcmaContext.VAR) | |
449 | |
450 elif token.IsOperator(','): | |
451 while self._context.type not in (EcmaContext.VAR, | |
452 EcmaContext.ARRAY_LITERAL, | |
453 EcmaContext.OBJECT_LITERAL, | |
454 EcmaContext.STATEMENT, | |
455 EcmaContext.PARAMETERS, | |
456 EcmaContext.GROUP): | |
457 self._PopContext() | |
458 | |
459 elif token_type == TokenType.SEMICOLON: | |
460 self._EndStatement() | |
461 | |
462 def Process(self, first_token): | |
463 """Processes the token stream starting with the given token.""" | |
464 self._token = first_token | |
465 while self._token: | |
466 self._ProcessToken() | |
467 | |
468 if self._token.IsCode(): | |
469 self._last_code = self._token | |
470 | |
471 self._token = self._token.next | |
472 | |
473 try: | |
474 self._PopContextType(self, EcmaContext.ROOT) | |
475 except ParseError: | |
476 # Ignore the "popped to root" error. | |
477 pass | |
478 | |
479 def _ProcessToken(self): | |
480 """Process the given token.""" | |
481 token = self._token | |
482 token.metadata = self._CreateMetaData() | |
483 context = (self._ProcessContext() or self._context) | |
484 token.metadata.context = context | |
485 token.metadata.last_code = self._last_code | |
486 | |
487 # Determine the operator type of the token, if applicable. | |
488 if token.type == TokenType.OPERATOR: | |
489 token.metadata.operator_type = self._GetOperatorType(token) | |
490 | |
491 # Determine if there is an implied semicolon after the token. | |
492 if token.type != TokenType.SEMICOLON: | |
493 next_code = tokenutil.SearchExcept(token, TokenType.NON_CODE_TYPES) | |
494 # A statement like if (x) does not need a semicolon after it | |
495 is_implied_block = self._context == EcmaContext.IMPLIED_BLOCK | |
496 is_last_code_in_line = token.IsCode() and ( | |
497 not next_code or next_code.line_number != token.line_number) | |
498 is_continued_operator = (token.type == TokenType.OPERATOR and | |
499 not token.metadata.IsUnaryPostOperator()) | |
500 is_continued_dot = token.string == '.' | |
501 next_code_is_operator = next_code and next_code.type == TokenType.OPERATOR | |
502 is_end_of_block = ( | |
503 token.type == TokenType.END_BLOCK and | |
504 token.metadata.context.type != EcmaContext.OBJECT_LITERAL) | |
505 is_multiline_string = (token.type == TokenType.STRING_TEXT or | |
506 token.type == TokenType.TEMPLATE_STRING_START) | |
507 is_continued_var_decl = (token.IsKeyword('var') and | |
508 next_code and | |
509 (next_code.type in [TokenType.IDENTIFIER, | |
510 TokenType.SIMPLE_LVALUE]) and | |
511 token.line_number < next_code.line_number) | |
512 next_code_is_block = next_code and next_code.type == TokenType.START_BLOCK | |
513 if (is_last_code_in_line and | |
514 self._StatementCouldEndInContext() and | |
515 not is_multiline_string and | |
516 not is_end_of_block and | |
517 not is_continued_var_decl and | |
518 not is_continued_operator and | |
519 not is_continued_dot and | |
520 not next_code_is_operator and | |
521 not is_implied_block and | |
522 not next_code_is_block): | |
523 token.metadata.is_implied_semicolon = True | |
524 self._EndStatement() | |
525 | |
526 def _StatementCouldEndInContext(self): | |
527 """Returns if the current statement (if any) may end in this context.""" | |
528 # In the basic statement or variable declaration context, statement can | |
529 # always end in this context. | |
530 if self._context.type in (EcmaContext.STATEMENT, EcmaContext.VAR): | |
531 return True | |
532 | |
533 # End of a ternary false branch inside a statement can also be the | |
534 # end of the statement, for example: | |
535 # var x = foo ? foo.bar() : null | |
536 # In this case the statement ends after the null, when the context stack | |
537 # looks like ternary_false > var > statement > root. | |
538 if (self._context.type == EcmaContext.TERNARY_FALSE and | |
539 self._context.parent.type in (EcmaContext.STATEMENT, EcmaContext.VAR)): | |
540 return True | |
541 | |
542 # In all other contexts like object and array literals, ternary true, etc. | |
543 # the statement can't yet end. | |
544 return False | |
545 | |
546 def _GetOperatorType(self, token): | |
547 """Returns the operator type of the given operator token. | |
548 | |
549 Args: | |
550 token: The token to get arity for. | |
551 | |
552 Returns: | |
553 The type of the operator. One of the *_OPERATOR constants defined in | |
554 EcmaMetaData. | |
555 """ | |
556 if token.string == '?': | |
557 return EcmaMetaData.TERNARY_OPERATOR | |
558 | |
559 if token.string in TokenType.UNARY_OPERATORS: | |
560 return EcmaMetaData.UNARY_OPERATOR | |
561 | |
562 last_code = token.metadata.last_code | |
563 if not last_code or last_code.type == TokenType.END_BLOCK: | |
564 return EcmaMetaData.UNARY_OPERATOR | |
565 | |
566 if (token.string in TokenType.UNARY_POST_OPERATORS and | |
567 last_code.type in TokenType.EXPRESSION_ENDER_TYPES): | |
568 return EcmaMetaData.UNARY_POST_OPERATOR | |
569 | |
570 if (token.string in TokenType.UNARY_OK_OPERATORS and | |
571 last_code.type not in TokenType.EXPRESSION_ENDER_TYPES and | |
572 last_code.string not in TokenType.UNARY_POST_OPERATORS): | |
573 return EcmaMetaData.UNARY_OPERATOR | |
574 | |
575 return EcmaMetaData.BINARY_OPERATOR | |
OLD | NEW |