| Index: third_party/closure_linter/closure_linter/common/tokenizer.py
|
| diff --git a/third_party/closure_linter/closure_linter/common/tokenizer.py b/third_party/closure_linter/closure_linter/common/tokenizer.py
|
| deleted file mode 100755
|
| index 9420ea3267a5a23ff76bf0632ffdfb464fd9b7a1..0000000000000000000000000000000000000000
|
| --- a/third_party/closure_linter/closure_linter/common/tokenizer.py
|
| +++ /dev/null
|
| @@ -1,185 +0,0 @@
|
| -#!/usr/bin/env python
|
| -#
|
| -# Copyright 2007 The Closure Linter Authors. All Rights Reserved.
|
| -#
|
| -# Licensed under the Apache License, Version 2.0 (the "License");
|
| -# you may not use this file except in compliance with the License.
|
| -# You may obtain a copy of the License at
|
| -#
|
| -# http://www.apache.org/licenses/LICENSE-2.0
|
| -#
|
| -# Unless required by applicable law or agreed to in writing, software
|
| -# distributed under the License is distributed on an "AS-IS" BASIS,
|
| -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| -# See the License for the specific language governing permissions and
|
| -# limitations under the License.
|
| -
|
| -"""Regular expression based lexer."""
|
| -
|
| -__author__ = ('robbyw@google.com (Robert Walker)',
|
| - 'ajp@google.com (Andy Perelson)')
|
| -
|
| -from closure_linter.common import tokens
|
| -
|
| -# Shorthand
|
| -Type = tokens.TokenType
|
| -
|
| -
|
| -class Tokenizer(object):
|
| - """General purpose tokenizer.
|
| -
|
| - Attributes:
|
| - mode: The latest mode of the tokenizer. This allows patterns to distinguish
|
| - if they are mid-comment, mid-parameter list, etc.
|
| - matchers: Dictionary of modes to sequences of matchers that define the
|
| - patterns to check at any given time.
|
| - default_types: Dictionary of modes to types, defining what type to give
|
| - non-matched text when in the given mode. Defaults to Type.NORMAL.
|
| - """
|
| -
|
| - def __init__(self, starting_mode, matchers, default_types):
|
| - """Initialize the tokenizer.
|
| -
|
| - Args:
|
| - starting_mode: Mode to start in.
|
| - matchers: Dictionary of modes to sequences of matchers that defines the
|
| - patterns to check at any given time.
|
| - default_types: Dictionary of modes to types, defining what type to give
|
| - non-matched text when in the given mode. Defaults to Type.NORMAL.
|
| - """
|
| - self.__starting_mode = starting_mode
|
| - self.matchers = matchers
|
| - self.default_types = default_types
|
| -
|
| - def TokenizeFile(self, file):
|
| - """Tokenizes the given file.
|
| -
|
| - Args:
|
| - file: An iterable that yields one line of the file at a time.
|
| -
|
| - Returns:
|
| - The first token in the file
|
| - """
|
| - # The current mode.
|
| - self.mode = self.__starting_mode
|
| - # The first token in the stream.
|
| - self.__first_token = None
|
| - # The last token added to the token stream.
|
| - self.__last_token = None
|
| - # The current line number.
|
| - self.__line_number = 0
|
| -
|
| - for line in file:
|
| - self.__line_number += 1
|
| - self.__TokenizeLine(line)
|
| -
|
| - return self.__first_token
|
| -
|
| - def _CreateToken(self, string, token_type, line, line_number, values=None):
|
| - """Creates a new Token object (or subclass).
|
| -
|
| - Args:
|
| - string: The string of input the token represents.
|
| - token_type: The type of token.
|
| - line: The text of the line this token is in.
|
| - line_number: The line number of the token.
|
| - values: A dict of named values within the token. For instance, a
|
| - function declaration may have a value called 'name' which captures the
|
| - name of the function.
|
| -
|
| - Returns:
|
| - The newly created Token object.
|
| - """
|
| - return tokens.Token(string, token_type, line, line_number, values,
|
| - line_number)
|
| -
|
| - def __TokenizeLine(self, line):
|
| - """Tokenizes the given line.
|
| -
|
| - Args:
|
| - line: The contents of the line.
|
| - """
|
| - string = line.rstrip('\n\r\f')
|
| - line_number = self.__line_number
|
| - self.__start_index = 0
|
| -
|
| - if not string:
|
| - self.__AddToken(self._CreateToken('', Type.BLANK_LINE, line, line_number))
|
| - return
|
| -
|
| - normal_token = ''
|
| - index = 0
|
| - while index < len(string):
|
| - for matcher in self.matchers[self.mode]:
|
| - if matcher.line_start and index > 0:
|
| - continue
|
| -
|
| - match = matcher.regex.match(string, index)
|
| -
|
| - if match:
|
| - if normal_token:
|
| - self.__AddToken(
|
| - self.__CreateNormalToken(self.mode, normal_token, line,
|
| - line_number))
|
| - normal_token = ''
|
| -
|
| - # Add the match.
|
| - self.__AddToken(self._CreateToken(match.group(), matcher.type, line,
|
| - line_number, match.groupdict()))
|
| -
|
| - # Change the mode to the correct one for after this match.
|
| - self.mode = matcher.result_mode or self.mode
|
| -
|
| - # Shorten the string to be matched.
|
| - index = match.end()
|
| -
|
| - break
|
| -
|
| - else:
|
| - # If the for loop finishes naturally (i.e. no matches) we just add the
|
| - # first character to the string of consecutive non match characters.
|
| - # These will constitute a NORMAL token.
|
| - if string:
|
| - normal_token += string[index:index + 1]
|
| - index += 1
|
| -
|
| - if normal_token:
|
| - self.__AddToken(
|
| - self.__CreateNormalToken(self.mode, normal_token, line, line_number))
|
| -
|
| - def __CreateNormalToken(self, mode, string, line, line_number):
|
| - """Creates a normal token.
|
| -
|
| - Args:
|
| - mode: The current mode.
|
| - string: The string to tokenize.
|
| - line: The line of text.
|
| - line_number: The line number within the file.
|
| -
|
| - Returns:
|
| - A Token object, of the default type for the current mode.
|
| - """
|
| - type = Type.NORMAL
|
| - if mode in self.default_types:
|
| - type = self.default_types[mode]
|
| - return self._CreateToken(string, type, line, line_number)
|
| -
|
| - def __AddToken(self, token):
|
| - """Add the given token to the token stream.
|
| -
|
| - Args:
|
| - token: The token to add.
|
| - """
|
| - # Store the first token, or point the previous token to this one.
|
| - if not self.__first_token:
|
| - self.__first_token = token
|
| - else:
|
| - self.__last_token.next = token
|
| -
|
| - # Establish the doubly linked list
|
| - token.previous = self.__last_token
|
| - self.__last_token = token
|
| -
|
| - # Compute the character indices
|
| - token.start_index = self.__start_index
|
| - self.__start_index += token.length
|
|
|