OLD | NEW |
| (Empty) |
1 #!/usr/bin/env python | |
2 # | |
3 # Copyright 2007 The Closure Linter Authors. All Rights Reserved. | |
4 # | |
5 # Licensed under the Apache License, Version 2.0 (the "License"); | |
6 # you may not use this file except in compliance with the License. | |
7 # You may obtain a copy of the License at | |
8 # | |
9 # http://www.apache.org/licenses/LICENSE-2.0 | |
10 # | |
11 # Unless required by applicable law or agreed to in writing, software | |
12 # distributed under the License is distributed on an "AS-IS" BASIS, | |
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
14 # See the License for the specific language governing permissions and | |
15 # limitations under the License. | |
16 | |
17 """Light weight EcmaScript state tracker that reads tokens and tracks state.""" | |
18 | |
19 __author__ = ('robbyw@google.com (Robert Walker)', | |
20 'ajp@google.com (Andy Perelson)') | |
21 | |
22 import re | |
23 | |
24 from closure_linter import javascripttokenizer | |
25 from closure_linter import javascripttokens | |
26 from closure_linter import tokenutil | |
27 from closure_linter import typeannotation | |
28 | |
29 # Shorthand | |
30 Type = javascripttokens.JavaScriptTokenType | |
31 | |
32 | |
33 class DocFlag(object): | |
34 """Generic doc flag object. | |
35 | |
36 Attribute: | |
37 flag_type: param, return, define, type, etc. | |
38 flag_token: The flag token. | |
39 type_start_token: The first token specifying the flag type, | |
40 including braces. | |
41 type_end_token: The last token specifying the flag type, | |
42 including braces. | |
43 type: The type spec string. | |
44 jstype: The type spec, a TypeAnnotation instance. | |
45 name_token: The token specifying the flag name. | |
46 name: The flag name | |
47 description_start_token: The first token in the description. | |
48 description_end_token: The end token in the description. | |
49 description: The description. | |
50 """ | |
51 | |
52 # Please keep these lists alphabetized. | |
53 | |
54 # The list of standard jsdoc tags is from | |
55 STANDARD_DOC = frozenset([ | |
56 'author', | |
57 'bug', | |
58 'classTemplate', | |
59 'consistentIdGenerator', | |
60 'const', | |
61 'constructor', | |
62 'define', | |
63 'deprecated', | |
64 'dict', | |
65 'enum', | |
66 'export', | |
67 'expose', | |
68 'extends', | |
69 'externs', | |
70 'fileoverview', | |
71 'idGenerator', | |
72 'implements', | |
73 'implicitCast', | |
74 'interface', | |
75 'lends', | |
76 'license', | |
77 'ngInject', # This annotation is specific to AngularJS. | |
78 'noalias', | |
79 'nocompile', | |
80 'nosideeffects', | |
81 'override', | |
82 'owner', | |
83 'nocollapse', | |
84 'package', | |
85 'param', | |
86 'polymerBehavior', # This annotation is specific to Polymer. | |
87 'preserve', | |
88 'private', | |
89 'protected', | |
90 'public', | |
91 'return', | |
92 'see', | |
93 'stableIdGenerator', | |
94 'struct', | |
95 'supported', | |
96 'template', | |
97 'this', | |
98 'type', | |
99 'typedef', | |
100 'unrestricted', | |
101 ]) | |
102 | |
103 ANNOTATION = frozenset(['preserveTry', 'suppress']) | |
104 | |
105 LEGAL_DOC = STANDARD_DOC | ANNOTATION | |
106 | |
107 # Includes all Closure Compiler @suppress types. | |
108 # Not all of these annotations are interpreted by Closure Linter. | |
109 # | |
110 # Specific cases: | |
111 # - accessControls is supported by the compiler at the expression | |
112 # and method level to suppress warnings about private/protected | |
113 # access (method level applies to all references in the method). | |
114 # The linter mimics the compiler behavior. | |
115 SUPPRESS_TYPES = frozenset([ | |
116 'accessControls', | |
117 'ambiguousFunctionDecl', | |
118 'checkDebuggerStatement', | |
119 'checkRegExp', | |
120 'checkStructDictInheritance', | |
121 'checkTypes', | |
122 'checkVars', | |
123 'const', | |
124 'constantProperty', | |
125 'deprecated', | |
126 'duplicate', | |
127 'es5Strict', | |
128 'externsValidation', | |
129 'extraProvide', | |
130 'extraRequire', | |
131 'fileoverviewTags', | |
132 'globalThis', | |
133 'internetExplorerChecks', | |
134 'invalidCasts', | |
135 'missingProperties', | |
136 'missingProvide', | |
137 'missingRequire', | |
138 'missingReturn', | |
139 'nonStandardJsDocs', | |
140 'reportUnknownTypes', | |
141 'strictModuleDepCheck', | |
142 'suspiciousCode', | |
143 'tweakValidation', | |
144 'typeInvalidation', | |
145 'undefinedNames', | |
146 'undefinedVars', | |
147 'underscore', | |
148 'unknownDefines', | |
149 'unnecessaryCasts', | |
150 'unusedPrivateMembers', | |
151 'uselessCode', | |
152 'visibility', | |
153 'with', | |
154 ]) | |
155 | |
156 HAS_DESCRIPTION = frozenset([ | |
157 'define', | |
158 'deprecated', | |
159 'desc', | |
160 'fileoverview', | |
161 'license', | |
162 'param', | |
163 'preserve', | |
164 'return', | |
165 'supported', | |
166 ]) | |
167 | |
168 # Docflags whose argument should be parsed using the typeannotation parser. | |
169 HAS_TYPE = frozenset([ | |
170 'const', | |
171 'define', | |
172 'enum', | |
173 'export', | |
174 'extends', | |
175 'final', | |
176 'implements', | |
177 'mods', | |
178 'package', | |
179 'param', | |
180 'private', | |
181 'protected', | |
182 'public', | |
183 'return', | |
184 'suppress', | |
185 'type', | |
186 'typedef', | |
187 ]) | |
188 | |
189 # Docflags for which it's ok to omit the type (flag without an argument). | |
190 CAN_OMIT_TYPE = frozenset([ | |
191 'const', | |
192 'enum', | |
193 'export', | |
194 'final', | |
195 'package', | |
196 'private', | |
197 'protected', | |
198 'public', | |
199 'suppress', # We'll raise a separate INCORRECT_SUPPRESS_SYNTAX instead. | |
200 ]) | |
201 | |
202 # Docflags that only take a type as an argument and should not parse a | |
203 # following description. | |
204 TYPE_ONLY = frozenset([ | |
205 'const', | |
206 'enum', | |
207 'extends', | |
208 'implements', | |
209 'package', | |
210 'suppress', | |
211 'type', | |
212 ]) | |
213 | |
214 HAS_NAME = frozenset(['param']) | |
215 | |
216 EMPTY_COMMENT_LINE = re.compile(r'^\s*\*?\s*$') | |
217 EMPTY_STRING = re.compile(r'^\s*$') | |
218 | |
219 def __init__(self, flag_token, error_handler=None): | |
220 """Creates the DocFlag object and attaches it to the given start token. | |
221 | |
222 Args: | |
223 flag_token: The starting token of the flag. | |
224 error_handler: An optional error handler for errors occurring while | |
225 parsing the doctype. | |
226 """ | |
227 self.flag_token = flag_token | |
228 self.flag_type = flag_token.string.strip().lstrip('@') | |
229 | |
230 # Extract type, if applicable. | |
231 self.type = None | |
232 self.jstype = None | |
233 self.type_start_token = None | |
234 self.type_end_token = None | |
235 if self.flag_type in self.HAS_TYPE: | |
236 brace = tokenutil.SearchUntil(flag_token, [Type.DOC_START_BRACE], | |
237 Type.FLAG_ENDING_TYPES) | |
238 if brace: | |
239 end_token, contents = _GetMatchingEndBraceAndContents(brace) | |
240 self.type = contents | |
241 self.jstype = typeannotation.Parse(brace, end_token, | |
242 error_handler) | |
243 self.type_start_token = brace | |
244 self.type_end_token = end_token | |
245 elif (self.flag_type in self.TYPE_ONLY and | |
246 flag_token.next.type not in Type.FLAG_ENDING_TYPES and | |
247 flag_token.line_number == flag_token.next.line_number): | |
248 # b/10407058. If the flag is expected to be followed by a type then | |
249 # search for type in same line only. If no token after flag in same | |
250 # line then conclude that no type is specified. | |
251 self.type_start_token = flag_token.next | |
252 self.type_end_token, self.type = _GetEndTokenAndContents( | |
253 self.type_start_token) | |
254 if self.type is not None: | |
255 self.type = self.type.strip() | |
256 self.jstype = typeannotation.Parse(flag_token, self.type_end_token, | |
257 error_handler) | |
258 | |
259 # Extract name, if applicable. | |
260 self.name_token = None | |
261 self.name = None | |
262 if self.flag_type in self.HAS_NAME: | |
263 # Handle bad case, name could be immediately after flag token. | |
264 self.name_token = _GetNextPartialIdentifierToken(flag_token) | |
265 | |
266 # Handle good case, if found token is after type start, look for | |
267 # a identifier (substring to cover cases like [cnt] b/4197272) after | |
268 # type end, since types contain identifiers. | |
269 if (self.type and self.name_token and | |
270 tokenutil.Compare(self.name_token, self.type_start_token) > 0): | |
271 self.name_token = _GetNextPartialIdentifierToken(self.type_end_token) | |
272 | |
273 if self.name_token: | |
274 self.name = self.name_token.string | |
275 | |
276 # Extract description, if applicable. | |
277 self.description_start_token = None | |
278 self.description_end_token = None | |
279 self.description = None | |
280 if self.flag_type in self.HAS_DESCRIPTION: | |
281 search_start_token = flag_token | |
282 if self.name_token and self.type_end_token: | |
283 if tokenutil.Compare(self.type_end_token, self.name_token) > 0: | |
284 search_start_token = self.type_end_token | |
285 else: | |
286 search_start_token = self.name_token | |
287 elif self.name_token: | |
288 search_start_token = self.name_token | |
289 elif self.type: | |
290 search_start_token = self.type_end_token | |
291 | |
292 interesting_token = tokenutil.Search(search_start_token, | |
293 Type.FLAG_DESCRIPTION_TYPES | Type.FLAG_ENDING_TYPES) | |
294 if interesting_token.type in Type.FLAG_DESCRIPTION_TYPES: | |
295 self.description_start_token = interesting_token | |
296 self.description_end_token, self.description = ( | |
297 _GetEndTokenAndContents(interesting_token)) | |
298 | |
299 def HasType(self): | |
300 """Returns whether this flag should have a type annotation.""" | |
301 return self.flag_type in self.HAS_TYPE | |
302 | |
303 def __repr__(self): | |
304 return '<Flag: %s, type:%s>' % (self.flag_type, repr(self.jstype)) | |
305 | |
306 | |
307 class DocComment(object): | |
308 """JavaScript doc comment object. | |
309 | |
310 Attributes: | |
311 ordered_params: Ordered list of parameters documented. | |
312 start_token: The token that starts the doc comment. | |
313 end_token: The token that ends the doc comment. | |
314 suppressions: Map of suppression type to the token that added it. | |
315 """ | |
316 def __init__(self, start_token): | |
317 """Create the doc comment object. | |
318 | |
319 Args: | |
320 start_token: The first token in the doc comment. | |
321 """ | |
322 self.__flags = [] | |
323 self.start_token = start_token | |
324 self.end_token = None | |
325 self.suppressions = {} | |
326 self.invalidated = False | |
327 | |
328 @property | |
329 def ordered_params(self): | |
330 """Gives the list of parameter names as a list of strings.""" | |
331 params = [] | |
332 for flag in self.__flags: | |
333 if flag.flag_type == 'param' and flag.name: | |
334 params.append(flag.name) | |
335 return params | |
336 | |
337 def Invalidate(self): | |
338 """Indicate that the JSDoc is well-formed but we had problems parsing it. | |
339 | |
340 This is a short-circuiting mechanism so that we don't emit false | |
341 positives about well-formed doc comments just because we don't support | |
342 hot new syntaxes. | |
343 """ | |
344 self.invalidated = True | |
345 | |
346 def IsInvalidated(self): | |
347 """Test whether Invalidate() has been called.""" | |
348 return self.invalidated | |
349 | |
350 def AddSuppression(self, token): | |
351 """Add a new error suppression flag. | |
352 | |
353 Args: | |
354 token: The suppression flag token. | |
355 """ | |
356 flag = token and token.attached_object | |
357 if flag and flag.jstype: | |
358 for suppression in flag.jstype.IterIdentifiers(): | |
359 self.suppressions[suppression] = token | |
360 | |
361 def SuppressionOnly(self): | |
362 """Returns whether this comment contains only suppression flags.""" | |
363 if not self.__flags: | |
364 return False | |
365 | |
366 for flag in self.__flags: | |
367 if flag.flag_type != 'suppress': | |
368 return False | |
369 | |
370 return True | |
371 | |
372 def AddFlag(self, flag): | |
373 """Add a new document flag. | |
374 | |
375 Args: | |
376 flag: DocFlag object. | |
377 """ | |
378 self.__flags.append(flag) | |
379 | |
380 def InheritsDocumentation(self): | |
381 """Test if the jsdoc implies documentation inheritance. | |
382 | |
383 Returns: | |
384 True if documentation may be pulled off the superclass. | |
385 """ | |
386 return self.HasFlag('inheritDoc') or self.HasFlag('override') | |
387 | |
388 def HasFlag(self, flag_type): | |
389 """Test if the given flag has been set. | |
390 | |
391 Args: | |
392 flag_type: The type of the flag to check. | |
393 | |
394 Returns: | |
395 True if the flag is set. | |
396 """ | |
397 for flag in self.__flags: | |
398 if flag.flag_type == flag_type: | |
399 return True | |
400 return False | |
401 | |
402 def GetFlag(self, flag_type): | |
403 """Gets the last flag of the given type. | |
404 | |
405 Args: | |
406 flag_type: The type of the flag to get. | |
407 | |
408 Returns: | |
409 The last instance of the given flag type in this doc comment. | |
410 """ | |
411 for flag in reversed(self.__flags): | |
412 if flag.flag_type == flag_type: | |
413 return flag | |
414 | |
415 def GetDocFlags(self): | |
416 """Return the doc flags for this comment.""" | |
417 return list(self.__flags) | |
418 | |
419 def _YieldDescriptionTokens(self): | |
420 for token in self.start_token: | |
421 | |
422 if (token is self.end_token or | |
423 token.type is javascripttokens.JavaScriptTokenType.DOC_FLAG or | |
424 token.type not in javascripttokens.JavaScriptTokenType.COMMENT_TYPES): | |
425 return | |
426 | |
427 if token.type not in [ | |
428 javascripttokens.JavaScriptTokenType.START_DOC_COMMENT, | |
429 javascripttokens.JavaScriptTokenType.END_DOC_COMMENT, | |
430 javascripttokens.JavaScriptTokenType.DOC_PREFIX]: | |
431 yield token | |
432 | |
433 @property | |
434 def description(self): | |
435 return tokenutil.TokensToString( | |
436 self._YieldDescriptionTokens()) | |
437 | |
438 def GetTargetIdentifier(self): | |
439 """Returns the identifier (as a string) that this is a comment for. | |
440 | |
441 Note that this uses method uses GetIdentifierForToken to get the full | |
442 identifier, even if broken up by whitespace, newlines, or comments, | |
443 and thus could be longer than GetTargetToken().string. | |
444 | |
445 Returns: | |
446 The identifier for the token this comment is for. | |
447 """ | |
448 token = self.GetTargetToken() | |
449 if token: | |
450 return tokenutil.GetIdentifierForToken(token) | |
451 | |
452 def GetTargetToken(self): | |
453 """Get this comment's target token. | |
454 | |
455 Returns: | |
456 The token that is the target of this comment, or None if there isn't one. | |
457 """ | |
458 | |
459 # File overviews describe the file, not a token. | |
460 if self.HasFlag('fileoverview'): | |
461 return | |
462 | |
463 skip_types = frozenset([ | |
464 Type.WHITESPACE, | |
465 Type.BLANK_LINE, | |
466 Type.START_PAREN]) | |
467 | |
468 target_types = frozenset([ | |
469 Type.FUNCTION_NAME, | |
470 Type.IDENTIFIER, | |
471 Type.SIMPLE_LVALUE]) | |
472 | |
473 token = self.end_token.next | |
474 while token: | |
475 if token.type in target_types: | |
476 return token | |
477 | |
478 # Handles the case of a comment on "var foo = ...' | |
479 if token.IsKeyword('var'): | |
480 next_code_token = tokenutil.CustomSearch( | |
481 token, | |
482 lambda t: t.type not in Type.NON_CODE_TYPES) | |
483 | |
484 if (next_code_token and | |
485 next_code_token.IsType(Type.SIMPLE_LVALUE)): | |
486 return next_code_token | |
487 | |
488 return | |
489 | |
490 # Handles the case of a comment on "function foo () {}" | |
491 if token.type is Type.FUNCTION_DECLARATION: | |
492 next_code_token = tokenutil.CustomSearch( | |
493 token, | |
494 lambda t: t.type not in Type.NON_CODE_TYPES) | |
495 | |
496 if next_code_token.IsType(Type.FUNCTION_NAME): | |
497 return next_code_token | |
498 | |
499 return | |
500 | |
501 # Skip types will end the search. | |
502 if token.type not in skip_types: | |
503 return | |
504 | |
505 token = token.next | |
506 | |
507 def CompareParameters(self, params): | |
508 """Computes the edit distance and list from the function params to the docs. | |
509 | |
510 Uses the Levenshtein edit distance algorithm, with code modified from | |
511 http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Levenshtein_di
stance#Python | |
512 | |
513 Args: | |
514 params: The parameter list for the function declaration. | |
515 | |
516 Returns: | |
517 The edit distance, the edit list. | |
518 """ | |
519 source_len, target_len = len(self.ordered_params), len(params) | |
520 edit_lists = [[]] | |
521 distance = [[]] | |
522 for i in range(target_len+1): | |
523 edit_lists[0].append(['I'] * i) | |
524 distance[0].append(i) | |
525 | |
526 for j in range(1, source_len+1): | |
527 edit_lists.append([['D'] * j]) | |
528 distance.append([j]) | |
529 | |
530 for i in range(source_len): | |
531 for j in range(target_len): | |
532 cost = 1 | |
533 if self.ordered_params[i] == params[j]: | |
534 cost = 0 | |
535 | |
536 deletion = distance[i][j+1] + 1 | |
537 insertion = distance[i+1][j] + 1 | |
538 substitution = distance[i][j] + cost | |
539 | |
540 edit_list = None | |
541 best = None | |
542 if deletion <= insertion and deletion <= substitution: | |
543 # Deletion is best. | |
544 best = deletion | |
545 edit_list = list(edit_lists[i][j+1]) | |
546 edit_list.append('D') | |
547 | |
548 elif insertion <= substitution: | |
549 # Insertion is best. | |
550 best = insertion | |
551 edit_list = list(edit_lists[i+1][j]) | |
552 edit_list.append('I') | |
553 edit_lists[i+1].append(edit_list) | |
554 | |
555 else: | |
556 # Substitution is best. | |
557 best = substitution | |
558 edit_list = list(edit_lists[i][j]) | |
559 if cost: | |
560 edit_list.append('S') | |
561 else: | |
562 edit_list.append('=') | |
563 | |
564 edit_lists[i+1].append(edit_list) | |
565 distance[i+1].append(best) | |
566 | |
567 return distance[source_len][target_len], edit_lists[source_len][target_len] | |
568 | |
569 def __repr__(self): | |
570 """Returns a string representation of this object. | |
571 | |
572 Returns: | |
573 A string representation of this object. | |
574 """ | |
575 return '<DocComment: %s, %s>' % ( | |
576 str(self.ordered_params), str(self.__flags)) | |
577 | |
578 | |
579 # | |
580 # Helper methods used by DocFlag and DocComment to parse out flag information. | |
581 # | |
582 | |
583 | |
584 def _GetMatchingEndBraceAndContents(start_brace): | |
585 """Returns the matching end brace and contents between the two braces. | |
586 | |
587 If any FLAG_ENDING_TYPE token is encountered before a matching end brace, then | |
588 that token is used as the matching ending token. Contents will have all | |
589 comment prefixes stripped out of them, and all comment prefixes in between the | |
590 start and end tokens will be split out into separate DOC_PREFIX tokens. | |
591 | |
592 Args: | |
593 start_brace: The DOC_START_BRACE token immediately before desired contents. | |
594 | |
595 Returns: | |
596 The matching ending token (DOC_END_BRACE or FLAG_ENDING_TYPE) and a string | |
597 of the contents between the matching tokens, minus any comment prefixes. | |
598 """ | |
599 open_count = 1 | |
600 close_count = 0 | |
601 contents = [] | |
602 | |
603 # We don't consider the start brace part of the type string. | |
604 token = start_brace.next | |
605 while open_count != close_count: | |
606 if token.type == Type.DOC_START_BRACE: | |
607 open_count += 1 | |
608 elif token.type == Type.DOC_END_BRACE: | |
609 close_count += 1 | |
610 | |
611 if token.type != Type.DOC_PREFIX: | |
612 contents.append(token.string) | |
613 | |
614 if token.type in Type.FLAG_ENDING_TYPES: | |
615 break | |
616 token = token.next | |
617 | |
618 #Don't include the end token (end brace, end doc comment, etc.) in type. | |
619 token = token.previous | |
620 contents = contents[:-1] | |
621 | |
622 return token, ''.join(contents) | |
623 | |
624 | |
625 def _GetNextPartialIdentifierToken(start_token): | |
626 """Returns the first token having identifier as substring after a token. | |
627 | |
628 Searches each token after the start to see if it contains an identifier. | |
629 If found, token is returned. If no identifier is found returns None. | |
630 Search is abandoned when a FLAG_ENDING_TYPE token is found. | |
631 | |
632 Args: | |
633 start_token: The token to start searching after. | |
634 | |
635 Returns: | |
636 The token found containing identifier, None otherwise. | |
637 """ | |
638 token = start_token.next | |
639 | |
640 while token and token.type not in Type.FLAG_ENDING_TYPES: | |
641 match = javascripttokenizer.JavaScriptTokenizer.IDENTIFIER.search( | |
642 token.string) | |
643 if match is not None and token.type == Type.COMMENT: | |
644 return token | |
645 | |
646 token = token.next | |
647 | |
648 return None | |
649 | |
650 | |
651 def _GetEndTokenAndContents(start_token): | |
652 """Returns last content token and all contents before FLAG_ENDING_TYPE token. | |
653 | |
654 Comment prefixes are split into DOC_PREFIX tokens and stripped from the | |
655 returned contents. | |
656 | |
657 Args: | |
658 start_token: The token immediately before the first content token. | |
659 | |
660 Returns: | |
661 The last content token and a string of all contents including start and | |
662 end tokens, with comment prefixes stripped. | |
663 """ | |
664 iterator = start_token | |
665 last_line = iterator.line_number | |
666 last_token = None | |
667 contents = '' | |
668 doc_depth = 0 | |
669 while not iterator.type in Type.FLAG_ENDING_TYPES or doc_depth > 0: | |
670 if (iterator.IsFirstInLine() and | |
671 DocFlag.EMPTY_COMMENT_LINE.match(iterator.line)): | |
672 # If we have a blank comment line, consider that an implicit | |
673 # ending of the description. This handles a case like: | |
674 # | |
675 # * @return {boolean} True | |
676 # * | |
677 # * Note: This is a sentence. | |
678 # | |
679 # The note is not part of the @return description, but there was | |
680 # no definitive ending token. Rather there was a line containing | |
681 # only a doc comment prefix or whitespace. | |
682 break | |
683 | |
684 # b/2983692 | |
685 # don't prematurely match against a @flag if inside a doc flag | |
686 # need to think about what is the correct behavior for unterminated | |
687 # inline doc flags | |
688 if (iterator.type == Type.DOC_START_BRACE and | |
689 iterator.next.type == Type.DOC_INLINE_FLAG): | |
690 doc_depth += 1 | |
691 elif (iterator.type == Type.DOC_END_BRACE and | |
692 doc_depth > 0): | |
693 doc_depth -= 1 | |
694 | |
695 if iterator.type in Type.FLAG_DESCRIPTION_TYPES: | |
696 contents += iterator.string | |
697 last_token = iterator | |
698 | |
699 iterator = iterator.next | |
700 if iterator.line_number != last_line: | |
701 contents += '\n' | |
702 last_line = iterator.line_number | |
703 | |
704 end_token = last_token | |
705 if DocFlag.EMPTY_STRING.match(contents): | |
706 contents = None | |
707 else: | |
708 # Strip trailing newline. | |
709 contents = contents[:-1] | |
710 | |
711 return end_token, contents | |
712 | |
713 | |
714 class Function(object): | |
715 """Data about a JavaScript function. | |
716 | |
717 Attributes: | |
718 block_depth: Block depth the function began at. | |
719 doc: The DocComment associated with the function. | |
720 has_return: If the function has a return value. | |
721 has_this: If the function references the 'this' object. | |
722 is_assigned: If the function is part of an assignment. | |
723 is_constructor: If the function is a constructor. | |
724 name: The name of the function, whether given in the function keyword or | |
725 as the lvalue the function is assigned to. | |
726 start_token: First token of the function (the function' keyword token). | |
727 end_token: Last token of the function (the closing '}' token). | |
728 parameters: List of parameter names. | |
729 """ | |
730 | |
731 def __init__(self, block_depth, is_assigned, doc, name): | |
732 self.block_depth = block_depth | |
733 self.is_assigned = is_assigned | |
734 self.is_constructor = doc and doc.HasFlag('constructor') | |
735 self.is_interface = doc and doc.HasFlag('interface') | |
736 self.has_return = False | |
737 self.has_throw = False | |
738 self.has_this = False | |
739 self.name = name | |
740 self.doc = doc | |
741 self.start_token = None | |
742 self.end_token = None | |
743 self.parameters = None | |
744 | |
745 | |
746 class StateTracker(object): | |
747 """EcmaScript state tracker. | |
748 | |
749 Tracks block depth, function names, etc. within an EcmaScript token stream. | |
750 """ | |
751 | |
752 OBJECT_LITERAL = 'o' | |
753 CODE = 'c' | |
754 | |
755 def __init__(self, doc_flag=DocFlag): | |
756 """Initializes a JavaScript token stream state tracker. | |
757 | |
758 Args: | |
759 doc_flag: An optional custom DocFlag used for validating | |
760 documentation flags. | |
761 """ | |
762 self._doc_flag = doc_flag | |
763 self.Reset() | |
764 | |
765 def Reset(self): | |
766 """Resets the state tracker to prepare for processing a new page.""" | |
767 self._block_depth = 0 | |
768 self._is_block_close = False | |
769 self._paren_depth = 0 | |
770 self._function_stack = [] | |
771 self._functions_by_name = {} | |
772 self._last_comment = None | |
773 self._doc_comment = None | |
774 self._cumulative_params = None | |
775 self._block_types = [] | |
776 self._last_non_space_token = None | |
777 self._last_line = None | |
778 self._first_token = None | |
779 self._documented_identifiers = set() | |
780 self._variables_in_scope = [] | |
781 | |
782 def DocFlagPass(self, start_token, error_handler): | |
783 """Parses doc flags. | |
784 | |
785 This pass needs to be executed before the aliaspass and we don't want to do | |
786 a full-blown statetracker dry run for these. | |
787 | |
788 Args: | |
789 start_token: The token at which to start iterating | |
790 error_handler: An error handler for error reporting. | |
791 """ | |
792 if not start_token: | |
793 return | |
794 doc_flag_types = (Type.DOC_FLAG, Type.DOC_INLINE_FLAG) | |
795 for token in start_token: | |
796 if token.type in doc_flag_types: | |
797 token.attached_object = self._doc_flag(token, error_handler) | |
798 | |
799 def InFunction(self): | |
800 """Returns true if the current token is within a function. | |
801 | |
802 Returns: | |
803 True if the current token is within a function. | |
804 """ | |
805 return bool(self._function_stack) | |
806 | |
807 def InConstructor(self): | |
808 """Returns true if the current token is within a constructor. | |
809 | |
810 Returns: | |
811 True if the current token is within a constructor. | |
812 """ | |
813 return self.InFunction() and self._function_stack[-1].is_constructor | |
814 | |
815 def InInterfaceMethod(self): | |
816 """Returns true if the current token is within an interface method. | |
817 | |
818 Returns: | |
819 True if the current token is within an interface method. | |
820 """ | |
821 if self.InFunction(): | |
822 if self._function_stack[-1].is_interface: | |
823 return True | |
824 else: | |
825 name = self._function_stack[-1].name | |
826 prototype_index = name.find('.prototype.') | |
827 if prototype_index != -1: | |
828 class_function_name = name[0:prototype_index] | |
829 if (class_function_name in self._functions_by_name and | |
830 self._functions_by_name[class_function_name].is_interface): | |
831 return True | |
832 | |
833 return False | |
834 | |
835 def InTopLevelFunction(self): | |
836 """Returns true if the current token is within a top level function. | |
837 | |
838 Returns: | |
839 True if the current token is within a top level function. | |
840 """ | |
841 return len(self._function_stack) == 1 and self.InTopLevel() | |
842 | |
843 def InAssignedFunction(self): | |
844 """Returns true if the current token is within a function variable. | |
845 | |
846 Returns: | |
847 True if if the current token is within a function variable | |
848 """ | |
849 return self.InFunction() and self._function_stack[-1].is_assigned | |
850 | |
851 def IsFunctionOpen(self): | |
852 """Returns true if the current token is a function block open. | |
853 | |
854 Returns: | |
855 True if the current token is a function block open. | |
856 """ | |
857 return (self._function_stack and | |
858 self._function_stack[-1].block_depth == self._block_depth - 1) | |
859 | |
860 def IsFunctionClose(self): | |
861 """Returns true if the current token is a function block close. | |
862 | |
863 Returns: | |
864 True if the current token is a function block close. | |
865 """ | |
866 return (self._function_stack and | |
867 self._function_stack[-1].block_depth == self._block_depth) | |
868 | |
869 def InBlock(self): | |
870 """Returns true if the current token is within a block. | |
871 | |
872 Returns: | |
873 True if the current token is within a block. | |
874 """ | |
875 return bool(self._block_depth) | |
876 | |
877 def IsBlockClose(self): | |
878 """Returns true if the current token is a block close. | |
879 | |
880 Returns: | |
881 True if the current token is a block close. | |
882 """ | |
883 return self._is_block_close | |
884 | |
885 def InObjectLiteral(self): | |
886 """Returns true if the current token is within an object literal. | |
887 | |
888 Returns: | |
889 True if the current token is within an object literal. | |
890 """ | |
891 return self._block_depth and self._block_types[-1] == self.OBJECT_LITERAL | |
892 | |
893 def InObjectLiteralDescendant(self): | |
894 """Returns true if the current token has an object literal ancestor. | |
895 | |
896 Returns: | |
897 True if the current token has an object literal ancestor. | |
898 """ | |
899 return self.OBJECT_LITERAL in self._block_types | |
900 | |
901 def InParentheses(self): | |
902 """Returns true if the current token is within parentheses. | |
903 | |
904 Returns: | |
905 True if the current token is within parentheses. | |
906 """ | |
907 return bool(self._paren_depth) | |
908 | |
909 def ParenthesesDepth(self): | |
910 """Returns the number of parens surrounding the token. | |
911 | |
912 Returns: | |
913 The number of parenthesis surrounding the token. | |
914 """ | |
915 return self._paren_depth | |
916 | |
917 def BlockDepth(self): | |
918 """Returns the number of blocks in which the token is nested. | |
919 | |
920 Returns: | |
921 The number of blocks in which the token is nested. | |
922 """ | |
923 return self._block_depth | |
924 | |
925 def FunctionDepth(self): | |
926 """Returns the number of functions in which the token is nested. | |
927 | |
928 Returns: | |
929 The number of functions in which the token is nested. | |
930 """ | |
931 return len(self._function_stack) | |
932 | |
933 def InTopLevel(self): | |
934 """Whether we are at the top level in the class. | |
935 | |
936 This function call is language specific. In some languages like | |
937 JavaScript, a function is top level if it is not inside any parenthesis. | |
938 In languages such as ActionScript, a function is top level if it is directly | |
939 within a class. | |
940 """ | |
941 raise TypeError('Abstract method InTopLevel not implemented') | |
942 | |
943 def GetBlockType(self, token): | |
944 """Determine the block type given a START_BLOCK token. | |
945 | |
946 Code blocks come after parameters, keywords like else, and closing parens. | |
947 | |
948 Args: | |
949 token: The current token. Can be assumed to be type START_BLOCK. | |
950 Returns: | |
951 Code block type for current token. | |
952 """ | |
953 raise TypeError('Abstract method GetBlockType not implemented') | |
954 | |
955 def GetParams(self): | |
956 """Returns the accumulated input params as an array. | |
957 | |
958 In some EcmasSript languages, input params are specified like | |
959 (param:Type, param2:Type2, ...) | |
960 in other they are specified just as | |
961 (param, param2) | |
962 We handle both formats for specifying parameters here and leave | |
963 it to the compilers for each language to detect compile errors. | |
964 This allows more code to be reused between lint checkers for various | |
965 EcmaScript languages. | |
966 | |
967 Returns: | |
968 The accumulated input params as an array. | |
969 """ | |
970 params = [] | |
971 if self._cumulative_params: | |
972 params = re.compile(r'\s+').sub('', self._cumulative_params).split(',') | |
973 # Strip out the type from parameters of the form name:Type. | |
974 params = map(lambda param: param.split(':')[0], params) | |
975 | |
976 return params | |
977 | |
978 def GetLastComment(self): | |
979 """Return the last plain comment that could be used as documentation. | |
980 | |
981 Returns: | |
982 The last plain comment that could be used as documentation. | |
983 """ | |
984 return self._last_comment | |
985 | |
986 def GetDocComment(self): | |
987 """Return the most recent applicable documentation comment. | |
988 | |
989 Returns: | |
990 The last applicable documentation comment. | |
991 """ | |
992 return self._doc_comment | |
993 | |
994 def HasDocComment(self, identifier): | |
995 """Returns whether the identifier has been documented yet. | |
996 | |
997 Args: | |
998 identifier: The identifier. | |
999 | |
1000 Returns: | |
1001 Whether the identifier has been documented yet. | |
1002 """ | |
1003 return identifier in self._documented_identifiers | |
1004 | |
1005 def InDocComment(self): | |
1006 """Returns whether the current token is in a doc comment. | |
1007 | |
1008 Returns: | |
1009 Whether the current token is in a doc comment. | |
1010 """ | |
1011 return self._doc_comment and self._doc_comment.end_token is None | |
1012 | |
1013 def GetDocFlag(self): | |
1014 """Returns the current documentation flags. | |
1015 | |
1016 Returns: | |
1017 The current documentation flags. | |
1018 """ | |
1019 return self._doc_flag | |
1020 | |
1021 def IsTypeToken(self, t): | |
1022 if self.InDocComment() and t.type not in (Type.START_DOC_COMMENT, | |
1023 Type.DOC_FLAG, Type.DOC_INLINE_FLAG, Type.DOC_PREFIX): | |
1024 f = tokenutil.SearchUntil(t, [Type.DOC_FLAG], [Type.START_DOC_COMMENT], | |
1025 None, True) | |
1026 if (f and f.attached_object.type_start_token is not None and | |
1027 f.attached_object.type_end_token is not None): | |
1028 return (tokenutil.Compare(t, f.attached_object.type_start_token) > 0 and | |
1029 tokenutil.Compare(t, f.attached_object.type_end_token) < 0) | |
1030 return False | |
1031 | |
1032 def GetFunction(self): | |
1033 """Return the function the current code block is a part of. | |
1034 | |
1035 Returns: | |
1036 The current Function object. | |
1037 """ | |
1038 if self._function_stack: | |
1039 return self._function_stack[-1] | |
1040 | |
1041 def GetBlockDepth(self): | |
1042 """Return the block depth. | |
1043 | |
1044 Returns: | |
1045 The current block depth. | |
1046 """ | |
1047 return self._block_depth | |
1048 | |
1049 def GetLastNonSpaceToken(self): | |
1050 """Return the last non whitespace token.""" | |
1051 return self._last_non_space_token | |
1052 | |
1053 def GetLastLine(self): | |
1054 """Return the last line.""" | |
1055 return self._last_line | |
1056 | |
1057 def GetFirstToken(self): | |
1058 """Return the very first token in the file.""" | |
1059 return self._first_token | |
1060 | |
1061 def IsVariableInScope(self, token_string): | |
1062 """Checks if string is variable in current scope. | |
1063 | |
1064 For given string it checks whether the string is a defined variable | |
1065 (including function param) in current state. | |
1066 | |
1067 E.g. if variables defined (variables in current scope) is docs | |
1068 then docs, docs.length etc will be considered as variable in current | |
1069 scope. This will help in avoding extra goog.require for variables. | |
1070 | |
1071 Args: | |
1072 token_string: String to check if its is a variable in current scope. | |
1073 | |
1074 Returns: | |
1075 true if given string is a variable in current scope. | |
1076 """ | |
1077 for variable in self._variables_in_scope: | |
1078 if (token_string == variable | |
1079 or token_string.startswith(variable + '.')): | |
1080 return True | |
1081 | |
1082 return False | |
1083 | |
1084 def HandleToken(self, token, last_non_space_token): | |
1085 """Handles the given token and updates state. | |
1086 | |
1087 Args: | |
1088 token: The token to handle. | |
1089 last_non_space_token: | |
1090 """ | |
1091 self._is_block_close = False | |
1092 | |
1093 if not self._first_token: | |
1094 self._first_token = token | |
1095 | |
1096 # Track block depth. | |
1097 type = token.type | |
1098 if type == Type.START_BLOCK: | |
1099 self._block_depth += 1 | |
1100 | |
1101 # Subclasses need to handle block start very differently because | |
1102 # whether a block is a CODE or OBJECT_LITERAL block varies significantly | |
1103 # by language. | |
1104 self._block_types.append(self.GetBlockType(token)) | |
1105 | |
1106 # When entering a function body, record its parameters. | |
1107 if self.InFunction(): | |
1108 function = self._function_stack[-1] | |
1109 if self._block_depth == function.block_depth + 1: | |
1110 function.parameters = self.GetParams() | |
1111 | |
1112 # Track block depth. | |
1113 elif type == Type.END_BLOCK: | |
1114 self._is_block_close = not self.InObjectLiteral() | |
1115 self._block_depth -= 1 | |
1116 self._block_types.pop() | |
1117 | |
1118 # Track parentheses depth. | |
1119 elif type == Type.START_PAREN: | |
1120 self._paren_depth += 1 | |
1121 | |
1122 # Track parentheses depth. | |
1123 elif type == Type.END_PAREN: | |
1124 self._paren_depth -= 1 | |
1125 | |
1126 elif type == Type.COMMENT: | |
1127 self._last_comment = token.string | |
1128 | |
1129 elif type == Type.START_DOC_COMMENT: | |
1130 self._last_comment = None | |
1131 self._doc_comment = DocComment(token) | |
1132 | |
1133 elif type == Type.END_DOC_COMMENT: | |
1134 self._doc_comment.end_token = token | |
1135 | |
1136 elif type in (Type.DOC_FLAG, Type.DOC_INLINE_FLAG): | |
1137 # Don't overwrite flags if they were already parsed in a previous pass. | |
1138 if token.attached_object is None: | |
1139 flag = self._doc_flag(token) | |
1140 token.attached_object = flag | |
1141 else: | |
1142 flag = token.attached_object | |
1143 self._doc_comment.AddFlag(flag) | |
1144 | |
1145 if flag.flag_type == 'suppress': | |
1146 self._doc_comment.AddSuppression(token) | |
1147 | |
1148 elif type == Type.FUNCTION_DECLARATION: | |
1149 last_code = tokenutil.SearchExcept(token, Type.NON_CODE_TYPES, None, | |
1150 True) | |
1151 doc = None | |
1152 # Only top-level functions are eligible for documentation. | |
1153 if self.InTopLevel(): | |
1154 doc = self._doc_comment | |
1155 | |
1156 name = '' | |
1157 is_assigned = last_code and (last_code.IsOperator('=') or | |
1158 last_code.IsOperator('||') or last_code.IsOperator('&&') or | |
1159 (last_code.IsOperator(':') and not self.InObjectLiteral())) | |
1160 if is_assigned: | |
1161 # TODO(robbyw): This breaks for x[2] = ... | |
1162 # Must use loop to find full function name in the case of line-wrapped | |
1163 # declarations (bug 1220601) like: | |
1164 # my.function.foo. | |
1165 # bar = function() ... | |
1166 identifier = tokenutil.Search(last_code, Type.SIMPLE_LVALUE, None, True) | |
1167 while identifier and tokenutil.IsIdentifierOrDot(identifier): | |
1168 name = identifier.string + name | |
1169 # Traverse behind us, skipping whitespace and comments. | |
1170 while True: | |
1171 identifier = identifier.previous | |
1172 if not identifier or not identifier.type in Type.NON_CODE_TYPES: | |
1173 break | |
1174 | |
1175 else: | |
1176 next_token = tokenutil.SearchExcept(token, Type.NON_CODE_TYPES) | |
1177 while next_token and next_token.IsType(Type.FUNCTION_NAME): | |
1178 name += next_token.string | |
1179 next_token = tokenutil.Search(next_token, Type.FUNCTION_NAME, 2) | |
1180 | |
1181 function = Function(self._block_depth, is_assigned, doc, name) | |
1182 function.start_token = token | |
1183 | |
1184 self._function_stack.append(function) | |
1185 self._functions_by_name[name] = function | |
1186 | |
1187 # Add a delimiter in stack for scope variables to define start of | |
1188 # function. This helps in popping variables of this function when | |
1189 # function declaration ends. | |
1190 self._variables_in_scope.append('') | |
1191 | |
1192 elif type == Type.START_PARAMETERS: | |
1193 self._cumulative_params = '' | |
1194 | |
1195 elif type == Type.PARAMETERS: | |
1196 self._cumulative_params += token.string | |
1197 self._variables_in_scope.extend(self.GetParams()) | |
1198 | |
1199 elif type == Type.KEYWORD and token.string == 'return': | |
1200 next_token = tokenutil.SearchExcept(token, Type.NON_CODE_TYPES) | |
1201 if not next_token.IsType(Type.SEMICOLON): | |
1202 function = self.GetFunction() | |
1203 if function: | |
1204 function.has_return = True | |
1205 | |
1206 elif type == Type.KEYWORD and token.string == 'throw': | |
1207 function = self.GetFunction() | |
1208 if function: | |
1209 function.has_throw = True | |
1210 | |
1211 elif type == Type.KEYWORD and token.string == 'var': | |
1212 function = self.GetFunction() | |
1213 next_token = tokenutil.Search(token, [Type.IDENTIFIER, | |
1214 Type.SIMPLE_LVALUE]) | |
1215 | |
1216 if next_token: | |
1217 if next_token.type == Type.SIMPLE_LVALUE: | |
1218 self._variables_in_scope.append(next_token.values['identifier']) | |
1219 else: | |
1220 self._variables_in_scope.append(next_token.string) | |
1221 | |
1222 elif type == Type.SIMPLE_LVALUE: | |
1223 identifier = token.values['identifier'] | |
1224 jsdoc = self.GetDocComment() | |
1225 if jsdoc: | |
1226 self._documented_identifiers.add(identifier) | |
1227 | |
1228 self._HandleIdentifier(identifier, True) | |
1229 | |
1230 elif type == Type.IDENTIFIER: | |
1231 self._HandleIdentifier(token.string, False) | |
1232 | |
1233 # Detect documented non-assignments. | |
1234 next_token = tokenutil.SearchExcept(token, Type.NON_CODE_TYPES) | |
1235 if next_token and next_token.IsType(Type.SEMICOLON): | |
1236 if (self._last_non_space_token and | |
1237 self._last_non_space_token.IsType(Type.END_DOC_COMMENT)): | |
1238 self._documented_identifiers.add(token.string) | |
1239 | |
1240 def _HandleIdentifier(self, identifier, is_assignment): | |
1241 """Process the given identifier. | |
1242 | |
1243 Currently checks if it references 'this' and annotates the function | |
1244 accordingly. | |
1245 | |
1246 Args: | |
1247 identifier: The identifer to process. | |
1248 is_assignment: Whether the identifer is being written to. | |
1249 """ | |
1250 if identifier == 'this' or identifier.startswith('this.'): | |
1251 function = self.GetFunction() | |
1252 if function: | |
1253 function.has_this = True | |
1254 | |
1255 def HandleAfterToken(self, token): | |
1256 """Handle updating state after a token has been checked. | |
1257 | |
1258 This function should be used for destructive state changes such as | |
1259 deleting a tracked object. | |
1260 | |
1261 Args: | |
1262 token: The token to handle. | |
1263 """ | |
1264 type = token.type | |
1265 if type == Type.SEMICOLON or type == Type.END_PAREN or ( | |
1266 type == Type.END_BRACKET and | |
1267 self._last_non_space_token.type not in ( | |
1268 Type.SINGLE_QUOTE_STRING_END, Type.DOUBLE_QUOTE_STRING_END, | |
1269 Type.TEMPLATE_STRING_END)): | |
1270 # We end on any numeric array index, but keep going for string based | |
1271 # array indices so that we pick up manually exported identifiers. | |
1272 self._doc_comment = None | |
1273 self._last_comment = None | |
1274 | |
1275 elif type == Type.END_BLOCK: | |
1276 self._doc_comment = None | |
1277 self._last_comment = None | |
1278 | |
1279 if self.InFunction() and self.IsFunctionClose(): | |
1280 # TODO(robbyw): Detect the function's name for better errors. | |
1281 function = self._function_stack.pop() | |
1282 function.end_token = token | |
1283 | |
1284 # Pop all variables till delimiter ('') those were defined in the | |
1285 # function being closed so make them out of scope. | |
1286 while self._variables_in_scope and self._variables_in_scope[-1]: | |
1287 self._variables_in_scope.pop() | |
1288 | |
1289 # Pop delimiter | |
1290 if self._variables_in_scope: | |
1291 self._variables_in_scope.pop() | |
1292 | |
1293 elif type == Type.END_PARAMETERS and self._doc_comment: | |
1294 self._doc_comment = None | |
1295 self._last_comment = None | |
1296 | |
1297 if not token.IsAnyType(Type.WHITESPACE, Type.BLANK_LINE): | |
1298 self._last_non_space_token = token | |
1299 | |
1300 self._last_line = token.line | |
OLD | NEW |