third_party/closure_linter/closure_linter/common/tokenizer.py - Issue 2592193002: Remove closure_linter from Chrome

Unified Diff: third_party/closure_linter/closure_linter/common/tokenizer.py

Issue 2592193002: Remove closure_linter from Chrome (Closed)

Patch Set: Created 4 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

« no previous file with comments | « third_party/closure_linter/closure_linter/common/simplefileflags.py ('k') | third_party/closure_linter/closure_linter/common/tokens.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: third_party/closure_linter/closure_linter/common/tokenizer.py

diff --git a/third_party/closure_linter/closure_linter/common/tokenizer.py b/third_party/closure_linter/closure_linter/common/tokenizer.py

deleted file mode 100755

index 9420ea3267a5a23ff76bf0632ffdfb464fd9b7a1..0000000000000000000000000000000000000000

--- a/third_party/closure_linter/closure_linter/common/tokenizer.py

+++ /dev/null

@@ -1,185 +0,0 @@

-#!/usr/bin/env python

-# Licensed under the Apache License, Version 2.0 (the "License");

-# you may not use this file except in compliance with the License.

-# You may obtain a copy of the License at

-# http://www.apache.org/licenses/LICENSE-2.0

-# Unless required by applicable law or agreed to in writing, software

-# distributed under the License is distributed on an "AS-IS" BASIS,

-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

-# See the License for the specific language governing permissions and

-# limitations under the License.

-"""Regular expression based lexer."""

-__author__ = ('robbyw@google.com (Robert Walker)',

- 'ajp@google.com (Andy Perelson)')

-from closure_linter.common import tokens

-# Shorthand

-Type = tokens.TokenType

-class Tokenizer(object):

- """General purpose tokenizer.

- Attributes:

- mode: The latest mode of the tokenizer. This allows patterns to distinguish

- if they are mid-comment, mid-parameter list, etc.

- matchers: Dictionary of modes to sequences of matchers that define the

- patterns to check at any given time.

- default_types: Dictionary of modes to types, defining what type to give

- non-matched text when in the given mode. Defaults to Type.NORMAL.

- """

- def __init__(self, starting_mode, matchers, default_types):

- """Initialize the tokenizer.

- Args:

- starting_mode: Mode to start in.

- matchers: Dictionary of modes to sequences of matchers that defines the

- patterns to check at any given time.

- default_types: Dictionary of modes to types, defining what type to give

- non-matched text when in the given mode. Defaults to Type.NORMAL.

- """

- self.__starting_mode = starting_mode

- self.matchers = matchers

- self.default_types = default_types

- def TokenizeFile(self, file):

- """Tokenizes the given file.

- Args:

- file: An iterable that yields one line of the file at a time.

- Returns:

- The first token in the file

- """

- # The current mode.

- self.mode = self.__starting_mode

- # The first token in the stream.

- self.__first_token = None

- # The last token added to the token stream.

- self.__last_token = None

- # The current line number.

- self.__line_number = 0

- for line in file:

- self.__line_number += 1

- self.__TokenizeLine(line)

- return self.__first_token

- def _CreateToken(self, string, token_type, line, line_number, values=None):

- """Creates a new Token object (or subclass).

- Args:

- string: The string of input the token represents.

- token_type: The type of token.

- line: The text of the line this token is in.

- line_number: The line number of the token.

- values: A dict of named values within the token. For instance, a

- function declaration may have a value called 'name' which captures the

- name of the function.

- Returns:

- The newly created Token object.

- """

- return tokens.Token(string, token_type, line, line_number, values,

- line_number)

- def __TokenizeLine(self, line):

- """Tokenizes the given line.

- Args:

- line: The contents of the line.

- """

- string = line.rstrip('\n\r\f')

- line_number = self.__line_number

- self.__start_index = 0

- if not string:

- self.__AddToken(self._CreateToken('', Type.BLANK_LINE, line, line_number))

- return

- normal_token = ''

- index = 0

- while index < len(string):

- for matcher in self.matchers[self.mode]:

- if matcher.line_start and index > 0:

- continue

- match = matcher.regex.match(string, index)

- if match:

- if normal_token:

- self.__AddToken(

- self.__CreateNormalToken(self.mode, normal_token, line,

- line_number))

- normal_token = ''

- # Add the match.

- self.__AddToken(self._CreateToken(match.group(), matcher.type, line,

- line_number, match.groupdict()))

- # Change the mode to the correct one for after this match.

- self.mode = matcher.result_mode or self.mode

- # Shorten the string to be matched.

- index = match.end()

- break

- else:

- # If the for loop finishes naturally (i.e. no matches) we just add the

- # first character to the string of consecutive non match characters.

- # These will constitute a NORMAL token.

- if string:

- normal_token += string[index:index + 1]

- index += 1

- if normal_token:

- self.__AddToken(

- self.__CreateNormalToken(self.mode, normal_token, line, line_number))

- def __CreateNormalToken(self, mode, string, line, line_number):

- """Creates a normal token.

- Args:

- mode: The current mode.

- string: The string to tokenize.

- line: The line of text.

- line_number: The line number within the file.

- Returns:

- A Token object, of the default type for the current mode.

- """

- type = Type.NORMAL

- if mode in self.default_types:

- type = self.default_types[mode]

- return self._CreateToken(string, type, line, line_number)

- def __AddToken(self, token):

- """Add the given token to the token stream.

- Args:

- token: The token to add.

- """

- # Store the first token, or point the previous token to this one.

- if not self.__first_token:

- self.__first_token = token

- else:

- self.__last_token.next = token

- # Establish the doubly linked list

- token.previous = self.__last_token

- self.__last_token = token

- # Compute the character indices

- token.start_index = self.__start_index

- self.__start_index += token.length