OLD | NEW |
| (Empty) |
1 #!/usr/bin/env python | |
2 # Copyright 2010 The Closure Linter Authors. All Rights Reserved. | |
3 # | |
4 # Licensed under the Apache License, Version 2.0 (the "License"); | |
5 # you may not use this file except in compliance with the License. | |
6 # You may obtain a copy of the License at | |
7 # | |
8 # http://www.apache.org/licenses/LICENSE-2.0 | |
9 # | |
10 # Unless required by applicable law or agreed to in writing, software | |
11 # distributed under the License is distributed on an "AS-IS" BASIS, | |
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 # See the License for the specific language governing permissions and | |
14 # limitations under the License. | |
15 | |
16 """Methods for checking EcmaScript files for indentation issues.""" | |
17 | |
18 __author__ = ('robbyw@google.com (Robert Walker)') | |
19 | |
20 import gflags as flags | |
21 | |
22 from closure_linter import ecmametadatapass | |
23 from closure_linter import errors | |
24 from closure_linter import javascripttokens | |
25 from closure_linter import tokenutil | |
26 from closure_linter.common import error | |
27 from closure_linter.common import position | |
28 | |
29 | |
30 flags.DEFINE_boolean('debug_indentation', False, | |
31 'Whether to print debugging information for indentation.') | |
32 | |
33 | |
34 # Shorthand | |
35 Context = ecmametadatapass.EcmaContext | |
36 Error = error.Error | |
37 Position = position.Position | |
38 Type = javascripttokens.JavaScriptTokenType | |
39 | |
40 | |
41 # The general approach: | |
42 # | |
43 # 1. Build a stack of tokens that can affect indentation. | |
44 # For each token, we determine if it is a block or continuation token. | |
45 # Some tokens need to be temporarily overwritten in case they are removed | |
46 # before the end of the line. | |
47 # Much of the work here is determining which tokens to keep on the stack | |
48 # at each point. Operators, for example, should be removed once their | |
49 # expression or line is gone, while parentheses must stay until the matching | |
50 # end parentheses is found. | |
51 # | |
52 # 2. Given that stack, determine the allowable indentations. | |
53 # Due to flexible indentation rules in JavaScript, there may be many | |
54 # allowable indentations for each stack. We follows the general | |
55 # "no false positives" approach of GJsLint and build the most permissive | |
56 # set possible. | |
57 | |
58 | |
59 class TokenInfo(object): | |
60 """Stores information about a token. | |
61 | |
62 Attributes: | |
63 token: The token | |
64 is_block: Whether the token represents a block indentation. | |
65 is_transient: Whether the token should be automatically removed without | |
66 finding a matching end token. | |
67 overridden_by: TokenInfo for a token that overrides the indentation that | |
68 this token would require. | |
69 is_permanent_override: Whether the override on this token should persist | |
70 even after the overriding token is removed from the stack. For example: | |
71 x([ | |
72 1], | |
73 2); | |
74 needs this to be set so the last line is not required to be a continuation | |
75 indent. | |
76 line_number: The effective line number of this token. Will either be the | |
77 actual line number or the one before it in the case of a mis-wrapped | |
78 operator. | |
79 """ | |
80 | |
81 def __init__(self, token, is_block=False): | |
82 """Initializes a TokenInfo object. | |
83 | |
84 Args: | |
85 token: The token | |
86 is_block: Whether the token represents a block indentation. | |
87 """ | |
88 self.token = token | |
89 self.overridden_by = None | |
90 self.is_permanent_override = False | |
91 self.is_block = is_block | |
92 self.is_transient = not is_block and token.type not in ( | |
93 Type.START_PAREN, Type.START_PARAMETERS) | |
94 self.line_number = token.line_number | |
95 | |
96 def __repr__(self): | |
97 result = '\n %s' % self.token | |
98 if self.overridden_by: | |
99 result = '%s OVERRIDDEN [by "%s"]' % ( | |
100 result, self.overridden_by.token.string) | |
101 result += ' {is_block: %s, is_transient: %s}' % ( | |
102 self.is_block, self.is_transient) | |
103 return result | |
104 | |
105 | |
106 class IndentationRules(object): | |
107 """EmcaScript indentation rules. | |
108 | |
109 Can be used to find common indentation errors in JavaScript, ActionScript and | |
110 other Ecma like scripting languages. | |
111 """ | |
112 | |
113 def __init__(self): | |
114 """Initializes the IndentationRules checker.""" | |
115 self._stack = [] | |
116 | |
117 # Map from line number to number of characters it is off in indentation. | |
118 self._start_index_offset = {} | |
119 | |
120 def Finalize(self): | |
121 if self._stack: | |
122 old_stack = self._stack | |
123 self._stack = [] | |
124 raise Exception('INTERNAL ERROR: indentation stack is not empty: %r' % | |
125 old_stack) | |
126 | |
127 def CheckToken(self, token, state): | |
128 """Checks a token for indentation errors. | |
129 | |
130 Args: | |
131 token: The current token under consideration | |
132 state: Additional information about the current tree state | |
133 | |
134 Returns: | |
135 An error array [error code, error string, error token] if the token is | |
136 improperly indented, or None if indentation is correct. | |
137 """ | |
138 | |
139 token_type = token.type | |
140 indentation_errors = [] | |
141 stack = self._stack | |
142 is_first = self._IsFirstNonWhitespaceTokenInLine(token) | |
143 | |
144 # Add tokens that could decrease indentation before checking. | |
145 if token_type == Type.END_PAREN: | |
146 self._PopTo(Type.START_PAREN) | |
147 | |
148 elif token_type == Type.END_PARAMETERS: | |
149 self._PopTo(Type.START_PARAMETERS) | |
150 | |
151 elif token_type == Type.END_BRACKET: | |
152 self._PopTo(Type.START_BRACKET) | |
153 | |
154 elif token_type == Type.END_BLOCK: | |
155 start_token = self._PopTo(Type.START_BLOCK) | |
156 # Check for required goog.scope comment. | |
157 if start_token: | |
158 goog_scope = tokenutil.GoogScopeOrNoneFromStartBlock(start_token.token) | |
159 if goog_scope is not None: | |
160 if not token.line.endswith('; // goog.scope\n'): | |
161 if (token.line.find('//') > -1 and | |
162 token.line.find('goog.scope') > | |
163 token.line.find('//')): | |
164 indentation_errors.append([ | |
165 errors.MALFORMED_END_OF_SCOPE_COMMENT, | |
166 ('Malformed end of goog.scope comment. Please use the ' | |
167 'exact following syntax to close the scope:\n' | |
168 '}); // goog.scope'), | |
169 token, | |
170 Position(token.start_index, token.length)]) | |
171 else: | |
172 indentation_errors.append([ | |
173 errors.MISSING_END_OF_SCOPE_COMMENT, | |
174 ('Missing comment for end of goog.scope which opened at line ' | |
175 '%d. End the scope with:\n' | |
176 '}); // goog.scope' % | |
177 (start_token.line_number)), | |
178 token, | |
179 Position(token.start_index, token.length)]) | |
180 | |
181 elif token_type == Type.KEYWORD and token.string in ('case', 'default'): | |
182 self._Add(self._PopTo(Type.START_BLOCK)) | |
183 | |
184 elif token_type == Type.SEMICOLON: | |
185 self._PopTransient() | |
186 | |
187 if (is_first and | |
188 token_type not in (Type.COMMENT, Type.DOC_PREFIX, Type.STRING_TEXT)): | |
189 if flags.FLAGS.debug_indentation: | |
190 print 'Line #%d: stack %r' % (token.line_number, stack) | |
191 | |
192 # Ignore lines that start in JsDoc since we don't check them properly yet. | |
193 # TODO(robbyw): Support checking JsDoc indentation. | |
194 # Ignore lines that start as multi-line strings since indentation is N/A. | |
195 # Ignore lines that start with operators since we report that already. | |
196 # Ignore lines with tabs since we report that already. | |
197 expected = self._GetAllowableIndentations() | |
198 actual = self._GetActualIndentation(token) | |
199 | |
200 # Special case comments describing else, case, and default. Allow them | |
201 # to outdent to the parent block. | |
202 if token_type in Type.COMMENT_TYPES: | |
203 next_code = tokenutil.SearchExcept(token, Type.NON_CODE_TYPES) | |
204 if next_code and next_code.type == Type.END_BLOCK: | |
205 next_code = tokenutil.SearchExcept(next_code, Type.NON_CODE_TYPES) | |
206 if next_code and next_code.string in ('else', 'case', 'default'): | |
207 # TODO(robbyw): This almost certainly introduces false negatives. | |
208 expected |= self._AddToEach(expected, -2) | |
209 | |
210 if actual >= 0 and actual not in expected: | |
211 expected = sorted(expected) | |
212 indentation_errors.append([ | |
213 errors.WRONG_INDENTATION, | |
214 'Wrong indentation: expected any of {%s} but got %d' % ( | |
215 ', '.join('%d' % x for x in expected if x < 80), actual), | |
216 token, | |
217 Position(actual, expected[0])]) | |
218 self._start_index_offset[token.line_number] = expected[0] - actual | |
219 | |
220 # Add tokens that could increase indentation. | |
221 if token_type == Type.START_BRACKET: | |
222 self._Add(TokenInfo( | |
223 token=token, | |
224 is_block=token.metadata.context.type == Context.ARRAY_LITERAL)) | |
225 | |
226 elif token_type == Type.START_BLOCK or token.metadata.is_implied_block: | |
227 self._Add(TokenInfo(token=token, is_block=True)) | |
228 | |
229 elif token_type in (Type.START_PAREN, Type.START_PARAMETERS): | |
230 self._Add(TokenInfo(token=token, is_block=False)) | |
231 | |
232 elif token_type == Type.KEYWORD and token.string == 'return': | |
233 self._Add(TokenInfo(token)) | |
234 | |
235 elif not token.IsLastInLine() and ( | |
236 token.IsAssignment() or token.IsOperator('?')): | |
237 self._Add(TokenInfo(token=token)) | |
238 | |
239 # Handle implied block closes. | |
240 if token.metadata.is_implied_block_close: | |
241 self._PopToImpliedBlock() | |
242 | |
243 # Add some tokens only if they appear at the end of the line. | |
244 is_last = self._IsLastCodeInLine(token) | |
245 if is_last: | |
246 next_code_token = tokenutil.GetNextCodeToken(token) | |
247 # Increase required indentation if this is an overlong wrapped statement | |
248 # ending in an operator. | |
249 if token_type == Type.OPERATOR: | |
250 if token.string == ':': | |
251 if stack and stack[-1].token.string == '?': | |
252 # When a ternary : is on a different line than its '?', it doesn't | |
253 # add indentation. | |
254 if token.line_number == stack[-1].token.line_number: | |
255 self._Add(TokenInfo(token)) | |
256 elif token.metadata.context.type == Context.CASE_BLOCK: | |
257 # Pop transient tokens from say, line continuations, e.g., | |
258 # case x. | |
259 # y: | |
260 # Want to pop the transient 4 space continuation indent. | |
261 self._PopTransient() | |
262 # Starting the body of the case statement, which is a type of | |
263 # block. | |
264 self._Add(TokenInfo(token=token, is_block=True)) | |
265 elif token.metadata.context.type == Context.LITERAL_ELEMENT: | |
266 # When in an object literal, acts as operator indicating line | |
267 # continuations. | |
268 self._Add(TokenInfo(token)) | |
269 else: | |
270 # ':' might also be a statement label, no effect on indentation in | |
271 # this case. | |
272 pass | |
273 | |
274 elif token.string != ',': | |
275 self._Add(TokenInfo(token)) | |
276 else: | |
277 # The token is a comma. | |
278 if token.metadata.context.type == Context.VAR: | |
279 self._Add(TokenInfo(token)) | |
280 elif token.metadata.context.type != Context.PARAMETERS: | |
281 self._PopTransient() | |
282 # Increase required indentation if this is the end of a statement that's | |
283 # continued with an operator on the next line (e.g. the '.'). | |
284 elif (next_code_token and next_code_token.type == Type.OPERATOR and | |
285 not next_code_token.metadata.IsUnaryOperator()): | |
286 self._Add(TokenInfo(token)) | |
287 elif token_type == Type.PARAMETERS and token.string.endswith(','): | |
288 # Parameter lists. | |
289 self._Add(TokenInfo(token)) | |
290 elif token.IsKeyword('var'): | |
291 self._Add(TokenInfo(token)) | |
292 elif token.metadata.is_implied_semicolon: | |
293 self._PopTransient() | |
294 elif token.IsAssignment(): | |
295 self._Add(TokenInfo(token)) | |
296 | |
297 return indentation_errors | |
298 | |
299 def _AddToEach(self, original, amount): | |
300 """Returns a new set with the given amount added to each element. | |
301 | |
302 Args: | |
303 original: The original set of numbers | |
304 amount: The amount to add to each element | |
305 | |
306 Returns: | |
307 A new set containing each element of the original set added to the amount. | |
308 """ | |
309 return set([x + amount for x in original]) | |
310 | |
311 _HARD_STOP_TYPES = (Type.START_PAREN, Type.START_PARAMETERS, | |
312 Type.START_BRACKET) | |
313 | |
314 _HARD_STOP_STRINGS = ('return', '?') | |
315 | |
316 def _IsHardStop(self, token): | |
317 """Determines if the given token can have a hard stop after it. | |
318 | |
319 Args: | |
320 token: token to examine | |
321 | |
322 Returns: | |
323 Whether the token can have a hard stop after it. | |
324 | |
325 Hard stops are indentations defined by the position of another token as in | |
326 indentation lined up with return, (, [, and ?. | |
327 """ | |
328 return (token.type in self._HARD_STOP_TYPES or | |
329 token.string in self._HARD_STOP_STRINGS or | |
330 token.IsAssignment()) | |
331 | |
332 def _GetAllowableIndentations(self): | |
333 """Computes the set of allowable indentations. | |
334 | |
335 Returns: | |
336 The set of allowable indentations, given the current stack. | |
337 """ | |
338 expected = set([0]) | |
339 hard_stops = set([]) | |
340 | |
341 # Whether the tokens are still in the same continuation, meaning additional | |
342 # indentation is optional. As an example: | |
343 # x = 5 + | |
344 # 6 + | |
345 # 7; | |
346 # The second '+' does not add any required indentation. | |
347 in_same_continuation = False | |
348 | |
349 for token_info in self._stack: | |
350 token = token_info.token | |
351 | |
352 # Handle normal additive indentation tokens. | |
353 if not token_info.overridden_by and token.string != 'return': | |
354 if token_info.is_block: | |
355 expected = self._AddToEach(expected, 2) | |
356 hard_stops = self._AddToEach(hard_stops, 2) | |
357 in_same_continuation = False | |
358 elif in_same_continuation: | |
359 expected |= self._AddToEach(expected, 4) | |
360 hard_stops |= self._AddToEach(hard_stops, 4) | |
361 else: | |
362 expected = self._AddToEach(expected, 4) | |
363 hard_stops |= self._AddToEach(hard_stops, 4) | |
364 in_same_continuation = True | |
365 | |
366 # Handle hard stops after (, [, return, =, and ? | |
367 if self._IsHardStop(token): | |
368 override_is_hard_stop = (token_info.overridden_by and | |
369 self._IsHardStop( | |
370 token_info.overridden_by.token)) | |
371 if token.type == Type.START_PAREN and token.previous: | |
372 # For someFunction(...) we allow to indent at the beginning of the | |
373 # identifier +4 | |
374 prev = token.previous | |
375 if (prev.type == Type.IDENTIFIER and | |
376 prev.line_number == token.line_number): | |
377 hard_stops.add(prev.start_index + 4) | |
378 if not override_is_hard_stop: | |
379 start_index = token.start_index | |
380 if token.line_number in self._start_index_offset: | |
381 start_index += self._start_index_offset[token.line_number] | |
382 if (token.type in (Type.START_PAREN, Type.START_PARAMETERS) and | |
383 not token_info.overridden_by): | |
384 hard_stops.add(start_index + 1) | |
385 | |
386 elif token.string == 'return' and not token_info.overridden_by: | |
387 hard_stops.add(start_index + 7) | |
388 | |
389 elif token.type == Type.START_BRACKET: | |
390 hard_stops.add(start_index + 1) | |
391 | |
392 elif token.IsAssignment(): | |
393 hard_stops.add(start_index + len(token.string) + 1) | |
394 | |
395 elif token.IsOperator('?') and not token_info.overridden_by: | |
396 hard_stops.add(start_index + 2) | |
397 | |
398 return (expected | hard_stops) or set([0]) | |
399 | |
400 def _GetActualIndentation(self, token): | |
401 """Gets the actual indentation of the line containing the given token. | |
402 | |
403 Args: | |
404 token: Any token on the line. | |
405 | |
406 Returns: | |
407 The actual indentation of the line containing the given token. Returns | |
408 -1 if this line should be ignored due to the presence of tabs. | |
409 """ | |
410 # Move to the first token in the line | |
411 token = tokenutil.GetFirstTokenInSameLine(token) | |
412 | |
413 # If it is whitespace, it is the indentation. | |
414 if token.type == Type.WHITESPACE: | |
415 if token.string.find('\t') >= 0: | |
416 return -1 | |
417 else: | |
418 return len(token.string) | |
419 elif token.type == Type.PARAMETERS: | |
420 return len(token.string) - len(token.string.lstrip()) | |
421 else: | |
422 return 0 | |
423 | |
424 def _IsFirstNonWhitespaceTokenInLine(self, token): | |
425 """Determines if the given token is the first non-space token on its line. | |
426 | |
427 Args: | |
428 token: The token. | |
429 | |
430 Returns: | |
431 True if the token is the first non-whitespace token on its line. | |
432 """ | |
433 if token.type in (Type.WHITESPACE, Type.BLANK_LINE): | |
434 return False | |
435 if token.IsFirstInLine(): | |
436 return True | |
437 return (token.previous and token.previous.IsFirstInLine() and | |
438 token.previous.type == Type.WHITESPACE) | |
439 | |
440 def _IsLastCodeInLine(self, token): | |
441 """Determines if the given token is the last code token on its line. | |
442 | |
443 Args: | |
444 token: The token. | |
445 | |
446 Returns: | |
447 True if the token is the last code token on its line. | |
448 """ | |
449 if token.type in Type.NON_CODE_TYPES: | |
450 return False | |
451 start_token = token | |
452 while True: | |
453 token = token.next | |
454 if not token or token.line_number != start_token.line_number: | |
455 return True | |
456 if token.type not in Type.NON_CODE_TYPES: | |
457 return False | |
458 | |
459 def _AllFunctionPropertyAssignTokens(self, start_token, end_token): | |
460 """Checks if tokens are (likely) a valid function property assignment. | |
461 | |
462 Args: | |
463 start_token: Start of the token range. | |
464 end_token: End of the token range. | |
465 | |
466 Returns: | |
467 True if all tokens between start_token and end_token are legal tokens | |
468 within a function declaration and assignment into a property. | |
469 """ | |
470 for token in tokenutil.GetTokenRange(start_token, end_token): | |
471 fn_decl_tokens = (Type.FUNCTION_DECLARATION, | |
472 Type.PARAMETERS, | |
473 Type.START_PARAMETERS, | |
474 Type.END_PARAMETERS, | |
475 Type.END_PAREN) | |
476 if (token.type not in fn_decl_tokens and | |
477 token.IsCode() and | |
478 not tokenutil.IsIdentifierOrDot(token) and | |
479 not token.IsAssignment() and | |
480 not (token.type == Type.OPERATOR and token.string == ',')): | |
481 return False | |
482 return True | |
483 | |
484 def _Add(self, token_info): | |
485 """Adds the given token info to the stack. | |
486 | |
487 Args: | |
488 token_info: The token information to add. | |
489 """ | |
490 if self._stack and self._stack[-1].token == token_info.token: | |
491 # Don't add the same token twice. | |
492 return | |
493 | |
494 if token_info.is_block or token_info.token.type == Type.START_PAREN: | |
495 scope_token = tokenutil.GoogScopeOrNoneFromStartBlock(token_info.token) | |
496 token_info.overridden_by = TokenInfo(scope_token) if scope_token else None | |
497 | |
498 if (token_info.token.type == Type.START_BLOCK and | |
499 token_info.token.metadata.context.type == Context.BLOCK): | |
500 # Handle function() {} assignments: their block contents get special | |
501 # treatment and are allowed to just indent by two whitespace. | |
502 # For example | |
503 # long.long.name = function( | |
504 # a) { | |
505 # In this case the { and the = are on different lines. But the | |
506 # override should still apply for all previous stack tokens that are | |
507 # part of an assignment of a block. | |
508 | |
509 has_assignment = any(x for x in self._stack if x.token.IsAssignment()) | |
510 if has_assignment: | |
511 last_token = token_info.token.previous | |
512 for stack_info in reversed(self._stack): | |
513 if (last_token and | |
514 not self._AllFunctionPropertyAssignTokens(stack_info.token, | |
515 last_token)): | |
516 break | |
517 stack_info.overridden_by = token_info | |
518 stack_info.is_permanent_override = True | |
519 last_token = stack_info.token | |
520 | |
521 index = len(self._stack) - 1 | |
522 while index >= 0: | |
523 stack_info = self._stack[index] | |
524 stack_token = stack_info.token | |
525 | |
526 if stack_info.line_number == token_info.line_number: | |
527 # In general, tokens only override each other when they are on | |
528 # the same line. | |
529 stack_info.overridden_by = token_info | |
530 if (token_info.token.type == Type.START_BLOCK and | |
531 (stack_token.IsAssignment() or | |
532 stack_token.type in (Type.IDENTIFIER, Type.START_PAREN))): | |
533 # Multi-line blocks have lasting overrides, as in: | |
534 # callFn({ | |
535 # a: 10 | |
536 # }, | |
537 # 30); | |
538 # b/11450054. If a string is not closed properly then close_block | |
539 # could be null. | |
540 close_block = token_info.token.metadata.context.end_token | |
541 stack_info.is_permanent_override = close_block and ( | |
542 close_block.line_number != token_info.token.line_number) | |
543 else: | |
544 break | |
545 index -= 1 | |
546 | |
547 self._stack.append(token_info) | |
548 | |
549 def _Pop(self): | |
550 """Pops the top token from the stack. | |
551 | |
552 Returns: | |
553 The popped token info. | |
554 """ | |
555 token_info = self._stack.pop() | |
556 if token_info.token.type not in (Type.START_BLOCK, Type.START_BRACKET): | |
557 # Remove any temporary overrides. | |
558 self._RemoveOverrides(token_info) | |
559 else: | |
560 # For braces and brackets, which can be object and array literals, remove | |
561 # overrides when the literal is closed on the same line. | |
562 token_check = token_info.token | |
563 same_type = token_check.type | |
564 goal_type = None | |
565 if token_info.token.type == Type.START_BRACKET: | |
566 goal_type = Type.END_BRACKET | |
567 else: | |
568 goal_type = Type.END_BLOCK | |
569 line_number = token_info.token.line_number | |
570 count = 0 | |
571 while token_check and token_check.line_number == line_number: | |
572 if token_check.type == goal_type: | |
573 count -= 1 | |
574 if not count: | |
575 self._RemoveOverrides(token_info) | |
576 break | |
577 if token_check.type == same_type: | |
578 count += 1 | |
579 token_check = token_check.next | |
580 return token_info | |
581 | |
582 def _PopToImpliedBlock(self): | |
583 """Pops the stack until an implied block token is found.""" | |
584 while not self._Pop().token.metadata.is_implied_block: | |
585 pass | |
586 | |
587 def _PopTo(self, stop_type): | |
588 """Pops the stack until a token of the given type is popped. | |
589 | |
590 Args: | |
591 stop_type: The type of token to pop to. | |
592 | |
593 Returns: | |
594 The token info of the given type that was popped. | |
595 """ | |
596 last = None | |
597 while True: | |
598 last = self._Pop() | |
599 if last.token.type == stop_type: | |
600 break | |
601 return last | |
602 | |
603 def _RemoveOverrides(self, token_info): | |
604 """Marks any token that was overridden by this token as active again. | |
605 | |
606 Args: | |
607 token_info: The token that is being removed from the stack. | |
608 """ | |
609 for stack_token in self._stack: | |
610 if (stack_token.overridden_by == token_info and | |
611 not stack_token.is_permanent_override): | |
612 stack_token.overridden_by = None | |
613 | |
614 def _PopTransient(self): | |
615 """Pops all transient tokens - i.e. not blocks, literals, or parens.""" | |
616 while self._stack and self._stack[-1].is_transient: | |
617 self._Pop() | |
OLD | NEW |