Index: third_party/closure_linter/closure_linter/common/tokenizer.py |
diff --git a/third_party/closure_linter/closure_linter/common/tokenizer.py b/third_party/closure_linter/closure_linter/common/tokenizer.py |
deleted file mode 100755 |
index 9420ea3267a5a23ff76bf0632ffdfb464fd9b7a1..0000000000000000000000000000000000000000 |
--- a/third_party/closure_linter/closure_linter/common/tokenizer.py |
+++ /dev/null |
@@ -1,185 +0,0 @@ |
-#!/usr/bin/env python |
-# |
-# Copyright 2007 The Closure Linter Authors. All Rights Reserved. |
-# |
-# Licensed under the Apache License, Version 2.0 (the "License"); |
-# you may not use this file except in compliance with the License. |
-# You may obtain a copy of the License at |
-# |
-# http://www.apache.org/licenses/LICENSE-2.0 |
-# |
-# Unless required by applicable law or agreed to in writing, software |
-# distributed under the License is distributed on an "AS-IS" BASIS, |
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
-# See the License for the specific language governing permissions and |
-# limitations under the License. |
- |
-"""Regular expression based lexer.""" |
- |
-__author__ = ('robbyw@google.com (Robert Walker)', |
- 'ajp@google.com (Andy Perelson)') |
- |
-from closure_linter.common import tokens |
- |
-# Shorthand |
-Type = tokens.TokenType |
- |
- |
-class Tokenizer(object): |
- """General purpose tokenizer. |
- |
- Attributes: |
- mode: The latest mode of the tokenizer. This allows patterns to distinguish |
- if they are mid-comment, mid-parameter list, etc. |
- matchers: Dictionary of modes to sequences of matchers that define the |
- patterns to check at any given time. |
- default_types: Dictionary of modes to types, defining what type to give |
- non-matched text when in the given mode. Defaults to Type.NORMAL. |
- """ |
- |
- def __init__(self, starting_mode, matchers, default_types): |
- """Initialize the tokenizer. |
- |
- Args: |
- starting_mode: Mode to start in. |
- matchers: Dictionary of modes to sequences of matchers that defines the |
- patterns to check at any given time. |
- default_types: Dictionary of modes to types, defining what type to give |
- non-matched text when in the given mode. Defaults to Type.NORMAL. |
- """ |
- self.__starting_mode = starting_mode |
- self.matchers = matchers |
- self.default_types = default_types |
- |
- def TokenizeFile(self, file): |
- """Tokenizes the given file. |
- |
- Args: |
- file: An iterable that yields one line of the file at a time. |
- |
- Returns: |
- The first token in the file |
- """ |
- # The current mode. |
- self.mode = self.__starting_mode |
- # The first token in the stream. |
- self.__first_token = None |
- # The last token added to the token stream. |
- self.__last_token = None |
- # The current line number. |
- self.__line_number = 0 |
- |
- for line in file: |
- self.__line_number += 1 |
- self.__TokenizeLine(line) |
- |
- return self.__first_token |
- |
- def _CreateToken(self, string, token_type, line, line_number, values=None): |
- """Creates a new Token object (or subclass). |
- |
- Args: |
- string: The string of input the token represents. |
- token_type: The type of token. |
- line: The text of the line this token is in. |
- line_number: The line number of the token. |
- values: A dict of named values within the token. For instance, a |
- function declaration may have a value called 'name' which captures the |
- name of the function. |
- |
- Returns: |
- The newly created Token object. |
- """ |
- return tokens.Token(string, token_type, line, line_number, values, |
- line_number) |
- |
- def __TokenizeLine(self, line): |
- """Tokenizes the given line. |
- |
- Args: |
- line: The contents of the line. |
- """ |
- string = line.rstrip('\n\r\f') |
- line_number = self.__line_number |
- self.__start_index = 0 |
- |
- if not string: |
- self.__AddToken(self._CreateToken('', Type.BLANK_LINE, line, line_number)) |
- return |
- |
- normal_token = '' |
- index = 0 |
- while index < len(string): |
- for matcher in self.matchers[self.mode]: |
- if matcher.line_start and index > 0: |
- continue |
- |
- match = matcher.regex.match(string, index) |
- |
- if match: |
- if normal_token: |
- self.__AddToken( |
- self.__CreateNormalToken(self.mode, normal_token, line, |
- line_number)) |
- normal_token = '' |
- |
- # Add the match. |
- self.__AddToken(self._CreateToken(match.group(), matcher.type, line, |
- line_number, match.groupdict())) |
- |
- # Change the mode to the correct one for after this match. |
- self.mode = matcher.result_mode or self.mode |
- |
- # Shorten the string to be matched. |
- index = match.end() |
- |
- break |
- |
- else: |
- # If the for loop finishes naturally (i.e. no matches) we just add the |
- # first character to the string of consecutive non match characters. |
- # These will constitute a NORMAL token. |
- if string: |
- normal_token += string[index:index + 1] |
- index += 1 |
- |
- if normal_token: |
- self.__AddToken( |
- self.__CreateNormalToken(self.mode, normal_token, line, line_number)) |
- |
- def __CreateNormalToken(self, mode, string, line, line_number): |
- """Creates a normal token. |
- |
- Args: |
- mode: The current mode. |
- string: The string to tokenize. |
- line: The line of text. |
- line_number: The line number within the file. |
- |
- Returns: |
- A Token object, of the default type for the current mode. |
- """ |
- type = Type.NORMAL |
- if mode in self.default_types: |
- type = self.default_types[mode] |
- return self._CreateToken(string, type, line, line_number) |
- |
- def __AddToken(self, token): |
- """Add the given token to the token stream. |
- |
- Args: |
- token: The token to add. |
- """ |
- # Store the first token, or point the previous token to this one. |
- if not self.__first_token: |
- self.__first_token = token |
- else: |
- self.__last_token.next = token |
- |
- # Establish the doubly linked list |
- token.previous = self.__last_token |
- self.__last_token = token |
- |
- # Compute the character indices |
- token.start_index = self.__start_index |
- self.__start_index += token.length |