OLD | NEW |
| (Empty) |
1 #!/usr/bin/env python | |
2 # | |
3 # Copyright 2007 The Closure Linter Authors. All Rights Reserved. | |
4 # | |
5 # Licensed under the Apache License, Version 2.0 (the "License"); | |
6 # you may not use this file except in compliance with the License. | |
7 # You may obtain a copy of the License at | |
8 # | |
9 # http://www.apache.org/licenses/LICENSE-2.0 | |
10 # | |
11 # Unless required by applicable law or agreed to in writing, software | |
12 # distributed under the License is distributed on an "AS-IS" BASIS, | |
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
14 # See the License for the specific language governing permissions and | |
15 # limitations under the License. | |
16 | |
17 """Token utility functions.""" | |
18 | |
19 __author__ = ('robbyw@google.com (Robert Walker)', | |
20 'ajp@google.com (Andy Perelson)') | |
21 | |
22 import copy | |
23 import StringIO | |
24 | |
25 from closure_linter.common import tokens | |
26 from closure_linter.javascripttokens import JavaScriptToken | |
27 from closure_linter.javascripttokens import JavaScriptTokenType | |
28 | |
29 # Shorthand | |
30 Type = tokens.TokenType | |
31 | |
32 | |
33 def GetFirstTokenInSameLine(token): | |
34 """Returns the first token in the same line as token. | |
35 | |
36 Args: | |
37 token: Any token in the line. | |
38 | |
39 Returns: | |
40 The first token in the same line as token. | |
41 """ | |
42 while not token.IsFirstInLine(): | |
43 token = token.previous | |
44 return token | |
45 | |
46 | |
47 def GetFirstTokenInPreviousLine(token): | |
48 """Returns the first token in the previous line as token. | |
49 | |
50 Args: | |
51 token: Any token in the line. | |
52 | |
53 Returns: | |
54 The first token in the previous line as token, or None if token is on the | |
55 first line. | |
56 """ | |
57 first_in_line = GetFirstTokenInSameLine(token) | |
58 if first_in_line.previous: | |
59 return GetFirstTokenInSameLine(first_in_line.previous) | |
60 | |
61 return None | |
62 | |
63 | |
64 def GetLastTokenInSameLine(token): | |
65 """Returns the last token in the same line as token. | |
66 | |
67 Args: | |
68 token: Any token in the line. | |
69 | |
70 Returns: | |
71 The last token in the same line as token. | |
72 """ | |
73 while not token.IsLastInLine(): | |
74 token = token.next | |
75 return token | |
76 | |
77 | |
78 def GetAllTokensInSameLine(token): | |
79 """Returns all tokens in the same line as the given token. | |
80 | |
81 Args: | |
82 token: Any token in the line. | |
83 | |
84 Returns: | |
85 All tokens on the same line as the given token. | |
86 """ | |
87 first_token = GetFirstTokenInSameLine(token) | |
88 last_token = GetLastTokenInSameLine(token) | |
89 | |
90 tokens_in_line = [] | |
91 while first_token != last_token: | |
92 tokens_in_line.append(first_token) | |
93 first_token = first_token.next | |
94 tokens_in_line.append(last_token) | |
95 | |
96 return tokens_in_line | |
97 | |
98 | |
99 def CustomSearch(start_token, func, end_func=None, distance=None, | |
100 reverse=False): | |
101 """Returns the first token where func is True within distance of this token. | |
102 | |
103 Args: | |
104 start_token: The token to start searching from | |
105 func: The function to call to test a token for applicability | |
106 end_func: The function to call to test a token to determine whether to abort | |
107 the search. | |
108 distance: The number of tokens to look through before failing search. Must | |
109 be positive. If unspecified, will search until the end of the token | |
110 chain | |
111 reverse: When true, search the tokens before this one instead of the tokens | |
112 after it | |
113 | |
114 Returns: | |
115 The first token matching func within distance of this token, or None if no | |
116 such token is found. | |
117 """ | |
118 token = start_token | |
119 if reverse: | |
120 while token and (distance is None or distance > 0): | |
121 previous = token.previous | |
122 if previous: | |
123 if func(previous): | |
124 return previous | |
125 if end_func and end_func(previous): | |
126 return None | |
127 | |
128 token = previous | |
129 if distance is not None: | |
130 distance -= 1 | |
131 | |
132 else: | |
133 while token and (distance is None or distance > 0): | |
134 next_token = token.next | |
135 if next_token: | |
136 if func(next_token): | |
137 return next_token | |
138 if end_func and end_func(next_token): | |
139 return None | |
140 | |
141 token = next_token | |
142 if distance is not None: | |
143 distance -= 1 | |
144 | |
145 return None | |
146 | |
147 | |
148 def Search(start_token, token_types, distance=None, reverse=False): | |
149 """Returns the first token of type in token_types within distance. | |
150 | |
151 Args: | |
152 start_token: The token to start searching from | |
153 token_types: The allowable types of the token being searched for | |
154 distance: The number of tokens to look through before failing search. Must | |
155 be positive. If unspecified, will search until the end of the token | |
156 chain | |
157 reverse: When true, search the tokens before this one instead of the tokens | |
158 after it | |
159 | |
160 Returns: | |
161 The first token of any type in token_types within distance of this token, or | |
162 None if no such token is found. | |
163 """ | |
164 return CustomSearch(start_token, lambda token: token.IsAnyType(token_types), | |
165 None, distance, reverse) | |
166 | |
167 | |
168 def SearchExcept(start_token, token_types, distance=None, reverse=False): | |
169 """Returns the first token not of any type in token_types within distance. | |
170 | |
171 Args: | |
172 start_token: The token to start searching from | |
173 token_types: The unallowable types of the token being searched for | |
174 distance: The number of tokens to look through before failing search. Must | |
175 be positive. If unspecified, will search until the end of the token | |
176 chain | |
177 reverse: When true, search the tokens before this one instead of the tokens | |
178 after it | |
179 | |
180 Returns: | |
181 The first token of any type in token_types within distance of this token, or | |
182 None if no such token is found. | |
183 """ | |
184 return CustomSearch(start_token, | |
185 lambda token: not token.IsAnyType(token_types), | |
186 None, distance, reverse) | |
187 | |
188 | |
189 def SearchUntil(start_token, token_types, end_types, distance=None, | |
190 reverse=False): | |
191 """Returns the first token of type in token_types before a token of end_type. | |
192 | |
193 Args: | |
194 start_token: The token to start searching from. | |
195 token_types: The allowable types of the token being searched for. | |
196 end_types: Types of tokens to abort search if we find. | |
197 distance: The number of tokens to look through before failing search. Must | |
198 be positive. If unspecified, will search until the end of the token | |
199 chain | |
200 reverse: When true, search the tokens before this one instead of the tokens | |
201 after it | |
202 | |
203 Returns: | |
204 The first token of any type in token_types within distance of this token | |
205 before any tokens of type in end_type, or None if no such token is found. | |
206 """ | |
207 return CustomSearch(start_token, lambda token: token.IsAnyType(token_types), | |
208 lambda token: token.IsAnyType(end_types), | |
209 distance, reverse) | |
210 | |
211 | |
212 def DeleteToken(token): | |
213 """Deletes the given token from the linked list. | |
214 | |
215 Args: | |
216 token: The token to delete | |
217 """ | |
218 # When deleting a token, we do not update the deleted token itself to make | |
219 # sure the previous and next pointers are still pointing to tokens which are | |
220 # not deleted. Also it is very hard to keep track of all previously deleted | |
221 # tokens to update them when their pointers become invalid. So we add this | |
222 # flag that any token linked list iteration logic can skip deleted node safely | |
223 # when its current token is deleted. | |
224 token.is_deleted = True | |
225 if token.previous: | |
226 token.previous.next = token.next | |
227 | |
228 if token.next: | |
229 token.next.previous = token.previous | |
230 | |
231 following_token = token.next | |
232 while following_token and following_token.metadata.last_code == token: | |
233 following_token.metadata.last_code = token.metadata.last_code | |
234 following_token = following_token.next | |
235 | |
236 | |
237 def DeleteTokens(token, token_count): | |
238 """Deletes the given number of tokens starting with the given token. | |
239 | |
240 Args: | |
241 token: The token to start deleting at. | |
242 token_count: The total number of tokens to delete. | |
243 """ | |
244 for i in xrange(1, token_count): | |
245 DeleteToken(token.next) | |
246 DeleteToken(token) | |
247 | |
248 | |
249 def InsertTokenBefore(new_token, token): | |
250 """Insert new_token before token. | |
251 | |
252 Args: | |
253 new_token: A token to be added to the stream | |
254 token: A token already in the stream | |
255 """ | |
256 new_token.next = token | |
257 new_token.previous = token.previous | |
258 | |
259 new_token.metadata = copy.copy(token.metadata) | |
260 | |
261 if new_token.IsCode(): | |
262 old_last_code = token.metadata.last_code | |
263 following_token = token | |
264 while (following_token and | |
265 following_token.metadata.last_code == old_last_code): | |
266 following_token.metadata.last_code = new_token | |
267 following_token = following_token.next | |
268 | |
269 token.previous = new_token | |
270 if new_token.previous: | |
271 new_token.previous.next = new_token | |
272 | |
273 if new_token.start_index is None: | |
274 if new_token.line_number == token.line_number: | |
275 new_token.start_index = token.start_index | |
276 else: | |
277 previous_token = new_token.previous | |
278 if previous_token: | |
279 new_token.start_index = (previous_token.start_index + | |
280 len(previous_token.string)) | |
281 else: | |
282 new_token.start_index = 0 | |
283 | |
284 iterator = new_token.next | |
285 while iterator and iterator.line_number == new_token.line_number: | |
286 iterator.start_index += len(new_token.string) | |
287 iterator = iterator.next | |
288 | |
289 | |
290 def InsertTokenAfter(new_token, token): | |
291 """Insert new_token after token. | |
292 | |
293 Args: | |
294 new_token: A token to be added to the stream | |
295 token: A token already in the stream | |
296 """ | |
297 new_token.previous = token | |
298 new_token.next = token.next | |
299 | |
300 new_token.metadata = copy.copy(token.metadata) | |
301 | |
302 if token.IsCode(): | |
303 new_token.metadata.last_code = token | |
304 | |
305 if new_token.IsCode(): | |
306 following_token = token.next | |
307 while following_token and following_token.metadata.last_code == token: | |
308 following_token.metadata.last_code = new_token | |
309 following_token = following_token.next | |
310 | |
311 token.next = new_token | |
312 if new_token.next: | |
313 new_token.next.previous = new_token | |
314 | |
315 if new_token.start_index is None: | |
316 if new_token.line_number == token.line_number: | |
317 new_token.start_index = token.start_index + len(token.string) | |
318 else: | |
319 new_token.start_index = 0 | |
320 | |
321 iterator = new_token.next | |
322 while iterator and iterator.line_number == new_token.line_number: | |
323 iterator.start_index += len(new_token.string) | |
324 iterator = iterator.next | |
325 | |
326 | |
327 def InsertTokensAfter(new_tokens, token): | |
328 """Insert multiple tokens after token. | |
329 | |
330 Args: | |
331 new_tokens: An array of tokens to be added to the stream | |
332 token: A token already in the stream | |
333 """ | |
334 # TODO(user): It would be nicer to have InsertTokenAfter defer to here | |
335 # instead of vice-versa. | |
336 current_token = token | |
337 for new_token in new_tokens: | |
338 InsertTokenAfter(new_token, current_token) | |
339 current_token = new_token | |
340 | |
341 | |
342 def InsertSpaceTokenAfter(token): | |
343 """Inserts a space token after the given token. | |
344 | |
345 Args: | |
346 token: The token to insert a space token after | |
347 | |
348 Returns: | |
349 A single space token | |
350 """ | |
351 space_token = JavaScriptToken(' ', Type.WHITESPACE, token.line, | |
352 token.line_number) | |
353 InsertTokenAfter(space_token, token) | |
354 | |
355 | |
356 def InsertBlankLineAfter(token): | |
357 """Inserts a blank line after the given token. | |
358 | |
359 Args: | |
360 token: The token to insert a blank line after | |
361 | |
362 Returns: | |
363 A single space token | |
364 """ | |
365 blank_token = JavaScriptToken('', Type.BLANK_LINE, '', | |
366 token.line_number + 1) | |
367 InsertLineAfter(token, [blank_token]) | |
368 | |
369 | |
370 def InsertLineAfter(token, new_tokens): | |
371 """Inserts a new line consisting of new_tokens after the given token. | |
372 | |
373 Args: | |
374 token: The token to insert after. | |
375 new_tokens: The tokens that will make up the new line. | |
376 """ | |
377 insert_location = token | |
378 for new_token in new_tokens: | |
379 InsertTokenAfter(new_token, insert_location) | |
380 insert_location = new_token | |
381 | |
382 # Update all subsequent line numbers. | |
383 next_token = new_tokens[-1].next | |
384 while next_token: | |
385 next_token.line_number += 1 | |
386 next_token = next_token.next | |
387 | |
388 | |
389 def SplitToken(token, position): | |
390 """Splits the token into two tokens at position. | |
391 | |
392 Args: | |
393 token: The token to split | |
394 position: The position to split at. Will be the beginning of second token. | |
395 | |
396 Returns: | |
397 The new second token. | |
398 """ | |
399 new_string = token.string[position:] | |
400 token.string = token.string[:position] | |
401 | |
402 new_token = JavaScriptToken(new_string, token.type, token.line, | |
403 token.line_number) | |
404 InsertTokenAfter(new_token, token) | |
405 | |
406 return new_token | |
407 | |
408 | |
409 def Compare(token1, token2): | |
410 """Compares two tokens and determines their relative order. | |
411 | |
412 Args: | |
413 token1: The first token to compare. | |
414 token2: The second token to compare. | |
415 | |
416 Returns: | |
417 A negative integer, zero, or a positive integer as the first token is | |
418 before, equal, or after the second in the token stream. | |
419 """ | |
420 if token2.line_number != token1.line_number: | |
421 return token1.line_number - token2.line_number | |
422 else: | |
423 return token1.start_index - token2.start_index | |
424 | |
425 | |
426 def GoogScopeOrNoneFromStartBlock(token): | |
427 """Determines if the given START_BLOCK is part of a goog.scope statement. | |
428 | |
429 Args: | |
430 token: A token of type START_BLOCK. | |
431 | |
432 Returns: | |
433 The goog.scope function call token, or None if such call doesn't exist. | |
434 """ | |
435 if token.type != JavaScriptTokenType.START_BLOCK: | |
436 return None | |
437 | |
438 # Search for a goog.scope statement, which will be 5 tokens before the | |
439 # block. Illustration of the tokens found prior to the start block: | |
440 # goog.scope(function() { | |
441 # 5 4 3 21 ^ | |
442 | |
443 maybe_goog_scope = token | |
444 for unused_i in xrange(5): | |
445 maybe_goog_scope = (maybe_goog_scope.previous if maybe_goog_scope and | |
446 maybe_goog_scope.previous else None) | |
447 if maybe_goog_scope and maybe_goog_scope.string == 'goog.scope': | |
448 return maybe_goog_scope | |
449 | |
450 | |
451 def GetTokenRange(start_token, end_token): | |
452 """Returns a list of tokens between the two given, inclusive. | |
453 | |
454 Args: | |
455 start_token: Start token in the range. | |
456 end_token: End token in the range. | |
457 | |
458 Returns: | |
459 A list of tokens, in order, from start_token to end_token (including start | |
460 and end). Returns none if the tokens do not describe a valid range. | |
461 """ | |
462 | |
463 token_range = [] | |
464 token = start_token | |
465 | |
466 while token: | |
467 token_range.append(token) | |
468 | |
469 if token == end_token: | |
470 return token_range | |
471 | |
472 token = token.next | |
473 | |
474 | |
475 def TokensToString(token_iterable): | |
476 """Convert a number of tokens into a string. | |
477 | |
478 Newlines will be inserted whenever the line_number of two neighboring | |
479 strings differ. | |
480 | |
481 Args: | |
482 token_iterable: The tokens to turn to a string. | |
483 | |
484 Returns: | |
485 A string representation of the given tokens. | |
486 """ | |
487 | |
488 buf = StringIO.StringIO() | |
489 token_list = list(token_iterable) | |
490 if not token_list: | |
491 return '' | |
492 | |
493 line_number = token_list[0].line_number | |
494 | |
495 for token in token_list: | |
496 | |
497 while line_number < token.line_number: | |
498 line_number += 1 | |
499 buf.write('\n') | |
500 | |
501 if line_number > token.line_number: | |
502 line_number = token.line_number | |
503 buf.write('\n') | |
504 | |
505 buf.write(token.string) | |
506 | |
507 return buf.getvalue() | |
508 | |
509 | |
510 def GetPreviousCodeToken(token): | |
511 """Returns the code token before the specified token. | |
512 | |
513 Args: | |
514 token: A token. | |
515 | |
516 Returns: | |
517 The code token before the specified token or None if no such token | |
518 exists. | |
519 """ | |
520 | |
521 return CustomSearch( | |
522 token, | |
523 lambda t: t and t.type not in JavaScriptTokenType.NON_CODE_TYPES, | |
524 reverse=True) | |
525 | |
526 | |
527 def GetNextCodeToken(token): | |
528 """Returns the next code token after the specified token. | |
529 | |
530 Args: | |
531 token: A token. | |
532 | |
533 Returns: | |
534 The next code token after the specified token or None if no such token | |
535 exists. | |
536 """ | |
537 | |
538 return CustomSearch( | |
539 token, | |
540 lambda t: t and t.type not in JavaScriptTokenType.NON_CODE_TYPES, | |
541 reverse=False) | |
542 | |
543 | |
544 def GetIdentifierStart(token): | |
545 """Returns the first token in an identifier. | |
546 | |
547 Given a token which is part of an identifier, returns the token at the start | |
548 of the identifier. | |
549 | |
550 Args: | |
551 token: A token which is part of an identifier. | |
552 | |
553 Returns: | |
554 The token at the start of the identifier or None if the identifier was not | |
555 of the form 'a.b.c' (e.g. "['a']['b'].c"). | |
556 """ | |
557 | |
558 start_token = token | |
559 previous_code_token = GetPreviousCodeToken(token) | |
560 | |
561 while (previous_code_token and ( | |
562 previous_code_token.IsType(JavaScriptTokenType.IDENTIFIER) or | |
563 IsDot(previous_code_token))): | |
564 start_token = previous_code_token | |
565 previous_code_token = GetPreviousCodeToken(previous_code_token) | |
566 | |
567 if IsDot(start_token): | |
568 return None | |
569 | |
570 return start_token | |
571 | |
572 | |
573 def GetIdentifierForToken(token): | |
574 """Get the symbol specified by a token. | |
575 | |
576 Given a token, this function additionally concatenates any parts of an | |
577 identifying symbol being identified that are split by whitespace or a | |
578 newline. | |
579 | |
580 The function will return None if the token is not the first token of an | |
581 identifier. | |
582 | |
583 Args: | |
584 token: The first token of a symbol. | |
585 | |
586 Returns: | |
587 The whole symbol, as a string. | |
588 """ | |
589 | |
590 # Search backward to determine if this token is the first token of the | |
591 # identifier. If it is not the first token, return None to signal that this | |
592 # token should be ignored. | |
593 prev_token = token.previous | |
594 while prev_token: | |
595 if (prev_token.IsType(JavaScriptTokenType.IDENTIFIER) or | |
596 IsDot(prev_token)): | |
597 return None | |
598 | |
599 if (prev_token.IsType(tokens.TokenType.WHITESPACE) or | |
600 prev_token.IsAnyType(JavaScriptTokenType.COMMENT_TYPES)): | |
601 prev_token = prev_token.previous | |
602 else: | |
603 break | |
604 | |
605 # A "function foo()" declaration. | |
606 if token.type is JavaScriptTokenType.FUNCTION_NAME: | |
607 return token.string | |
608 | |
609 # A "var foo" declaration (if the previous token is 'var') | |
610 previous_code_token = GetPreviousCodeToken(token) | |
611 | |
612 if previous_code_token and previous_code_token.IsKeyword('var'): | |
613 return token.string | |
614 | |
615 # Otherwise, this is potentially a namespaced (goog.foo.bar) identifier that | |
616 # could span multiple lines or be broken up by whitespace. We need | |
617 # to concatenate. | |
618 identifier_types = set([ | |
619 JavaScriptTokenType.IDENTIFIER, | |
620 JavaScriptTokenType.SIMPLE_LVALUE | |
621 ]) | |
622 | |
623 assert token.type in identifier_types | |
624 | |
625 # Start with the first token | |
626 symbol_tokens = [token] | |
627 | |
628 if token.next: | |
629 for t in token.next: | |
630 last_symbol_token = symbol_tokens[-1] | |
631 | |
632 # A dot is part of the previous symbol. | |
633 if IsDot(t): | |
634 symbol_tokens.append(t) | |
635 continue | |
636 | |
637 # An identifier is part of the previous symbol if the previous one was a | |
638 # dot. | |
639 if t.type in identifier_types: | |
640 if IsDot(last_symbol_token): | |
641 symbol_tokens.append(t) | |
642 continue | |
643 else: | |
644 break | |
645 | |
646 # Skip any whitespace | |
647 if t.type in JavaScriptTokenType.NON_CODE_TYPES: | |
648 continue | |
649 | |
650 # This is the end of the identifier. Stop iterating. | |
651 break | |
652 | |
653 if symbol_tokens: | |
654 return ''.join([t.string for t in symbol_tokens]) | |
655 | |
656 | |
657 def GetStringAfterToken(token): | |
658 """Get string after token. | |
659 | |
660 Args: | |
661 token: Search will be done after this token. | |
662 | |
663 Returns: | |
664 String if found after token else None (empty string will also | |
665 return None). | |
666 | |
667 Search until end of string as in case of empty string Type.STRING_TEXT is not | |
668 present/found and don't want to return next string. | |
669 E.g. | |
670 a = ''; | |
671 b = 'test'; | |
672 When searching for string after 'a' if search is not limited by end of string | |
673 then it will return 'test' which is not desirable as there is a empty string | |
674 before that. | |
675 | |
676 This will return None for cases where string is empty or no string found | |
677 as in both cases there is no Type.STRING_TEXT. | |
678 """ | |
679 string_token = SearchUntil(token, JavaScriptTokenType.STRING_TEXT, | |
680 [JavaScriptTokenType.SINGLE_QUOTE_STRING_END, | |
681 JavaScriptTokenType.DOUBLE_QUOTE_STRING_END, | |
682 JavaScriptTokenType.TEMPLATE_STRING_END]) | |
683 if string_token: | |
684 return string_token.string | |
685 else: | |
686 return None | |
687 | |
688 | |
689 def IsDot(token): | |
690 """Whether the token represents a "dot" operator (foo.bar).""" | |
691 return token.type is JavaScriptTokenType.OPERATOR and token.string == '.' | |
692 | |
693 | |
694 def IsIdentifierOrDot(token): | |
695 """Whether the token is either an identifier or a '.'.""" | |
696 return (token.type in [JavaScriptTokenType.IDENTIFIER, | |
697 JavaScriptTokenType.SIMPLE_LVALUE] or | |
698 IsDot(token)) | |
OLD | NEW |