third_party/closure_linter/closure_linter/javascripttokenizer.py - Issue 2592193002: Remove closure_linter from Chrome

Unified Diff: third_party/closure_linter/closure_linter/javascripttokenizer.py

Issue 2592193002: Remove closure_linter from Chrome (Closed)

Patch Set: Created 4 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

« no previous file with comments | « third_party/closure_linter/closure_linter/javascriptstatetracker_test.py ('k') | third_party/closure_linter/closure_linter/javascripttokens.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: third_party/closure_linter/closure_linter/javascripttokenizer.py

diff --git a/third_party/closure_linter/closure_linter/javascripttokenizer.py b/third_party/closure_linter/closure_linter/javascripttokenizer.py

deleted file mode 100755

index 964db7ce2fdcb4b17af1a8c565c1a08f7fb94fa6..0000000000000000000000000000000000000000

--- a/third_party/closure_linter/closure_linter/javascripttokenizer.py

+++ /dev/null

@@ -1,478 +0,0 @@

-#!/usr/bin/env python

-# Licensed under the Apache License, Version 2.0 (the "License");

-# you may not use this file except in compliance with the License.

-# You may obtain a copy of the License at

-# http://www.apache.org/licenses/LICENSE-2.0

-# Unless required by applicable law or agreed to in writing, software

-# distributed under the License is distributed on an "AS-IS" BASIS,

-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

-# See the License for the specific language governing permissions and

-# limitations under the License.

-"""Regular expression based JavaScript parsing classes."""

-__author__ = ('robbyw@google.com (Robert Walker)',

- 'ajp@google.com (Andy Perelson)')

-import copy

-import re

-from closure_linter import javascripttokens

-from closure_linter.common import matcher

-from closure_linter.common import tokenizer

-# Shorthand

-Type = javascripttokens.JavaScriptTokenType

-Matcher = matcher.Matcher

-class JavaScriptModes(object):

- """Enumeration of the different matcher modes used for JavaScript."""

- TEXT_MODE = 'text'

- SINGLE_QUOTE_STRING_MODE = 'single_quote_string'

- DOUBLE_QUOTE_STRING_MODE = 'double_quote_string'

- TEMPLATE_STRING_MODE = 'template_string'

- BLOCK_COMMENT_MODE = 'block_comment'

- DOC_COMMENT_MODE = 'doc_comment'

- DOC_COMMENT_LEX_SPACES_MODE = 'doc_comment_spaces'

- LINE_COMMENT_MODE = 'line_comment'

- PARAMETER_MODE = 'parameter'

- FUNCTION_MODE = 'function'

-class JavaScriptTokenizer(tokenizer.Tokenizer):

- """JavaScript tokenizer.

- Convert JavaScript code in to an array of tokens.

- """

- # Useful patterns for JavaScript parsing.

- IDENTIFIER_CHAR = r'A-Za-z0-9_$'

- # Number patterns based on:

- # http://www.mozilla.org/js/language/js20-2000-07/formal/lexer-grammar.html

- MANTISSA = r"""

- (\d+(?!\.)) | # Matches '10'

- (\d+\.(?!\d)) | # Matches '10.'

- (\d*\.\d+) # Matches '.5' or '10.5'

- """

- DECIMAL_LITERAL = r'(%s)([eE][-+]?\d+)?' % MANTISSA

- HEX_LITERAL = r'0[xX][0-9a-fA-F]+'

- NUMBER = re.compile(r"""

- ((%s)|(%s))

- """ % (HEX_LITERAL, DECIMAL_LITERAL), re.VERBOSE)

- # Strings come in three parts - first we match the start of the string, then

- # the contents, then the end. The contents consist of any character except a

- # backslash or end of string, or a backslash followed by any character, or a

- # backslash followed by end of line to support correct parsing of multi-line

- # strings.

- SINGLE_QUOTE = re.compile(r"'")

- SINGLE_QUOTE_TEXT = re.compile(r"([^'\\]|\\(.|$))+")

- DOUBLE_QUOTE = re.compile(r'"')

- DOUBLE_QUOTE_TEXT = re.compile(r'([^"\\]|\\(.|$))+')

- # Template strings are different from normal strings in that they do not

- # require escaping of end of lines in order to be multi-line.

- TEMPLATE_QUOTE = re.compile(r'`')

- TEMPLATE_QUOTE_TEXT = re.compile(r'([^`]|$)+')

- START_SINGLE_LINE_COMMENT = re.compile(r'//')

- END_OF_LINE_SINGLE_LINE_COMMENT = re.compile(r'//$')

- START_DOC_COMMENT = re.compile(r'/\*\*')

- START_BLOCK_COMMENT = re.compile(r'/\*')

- END_BLOCK_COMMENT = re.compile(r'\*/')

- BLOCK_COMMENT_TEXT = re.compile(r'([^*]|\*(?!/))+')

- # Comment text is anything that we are not going to parse into another special

- # token like (inline) flags or end comments. Complicated regex to match

- # most normal characters, and '*', '{', '}', and '@' when we are sure that

- # it is safe. Expression [^*{\s]@ must come first, or the other options will

- # match everything before @, and we won't match @'s that aren't part of flags

- # like in email addresses in the @author tag.

- DOC_COMMENT_TEXT = re.compile(r'([^*{}\s]@|[^*{}@]|\*(?!/))+')

- DOC_COMMENT_NO_SPACES_TEXT = re.compile(r'([^*{}\s]@|[^*{}@\s]|\*(?!/))+')

- # Match anything that is allowed in a type definition, except for tokens

- # needed to parse it (and the lookahead assertion for "*/").

- DOC_COMMENT_TYPE_TEXT = re.compile(r'([^*|!?=<>(){}:,\s]|\*(?!/))+')

- # Match the prefix ' * ' that starts every line of jsdoc. Want to include

- # spaces after the '*', but nothing else that occurs after a '*', and don't

- # want to match the '*' in '*/'.

- DOC_PREFIX = re.compile(r'\s*\*(\s+|(?!/))')

- START_BLOCK = re.compile('{')

- END_BLOCK = re.compile('}')

- REGEX_CHARACTER_CLASS = r"""

- \[ # Opening bracket

- ([^\]\\]|\\.)* # Anything but a ] or \,

- # or a backslash followed by anything

- \] # Closing bracket

- """

- # We ensure the regex is followed by one of the above tokens to avoid

- # incorrectly parsing something like x / y / z as x REGEX(/ y /) z

- POST_REGEX_LIST = [

- ';', ',', r'\.', r'\)', r'\]', '$', r'\/\/', r'\/\*', ':', '}']

- REGEX = re.compile(r"""

- / # opening slash

- (?!\*) # not the start of a comment

- (\\.|[^\[\/\\]|(%s))* # a backslash followed by anything,

- # or anything but a / or [ or \,

- # or a character class

- / # closing slash

- [gimsx]* # optional modifiers

- (?=\s*(%s))

- """ % (REGEX_CHARACTER_CLASS, '|'.join(POST_REGEX_LIST)),

- re.VERBOSE)

- ANYTHING = re.compile(r'.*')

- PARAMETERS = re.compile(r'[^\)]+')

- CLOSING_PAREN_WITH_SPACE = re.compile(r'\)\s*')

- FUNCTION_DECLARATION = re.compile(r'\bfunction\b')

- OPENING_PAREN = re.compile(r'\(')

- CLOSING_PAREN = re.compile(r'\)')

- OPENING_BRACKET = re.compile(r'\[')

- CLOSING_BRACKET = re.compile(r'\]')

- # We omit these JS keywords from the list:

- # function - covered by FUNCTION_DECLARATION.

- # delete, in, instanceof, new, typeof - included as operators.

- # this - included in identifiers.

- # null, undefined - not included, should go in some "special constant" list.

- KEYWORD_LIST = [

- 'break',

- 'case',

- 'catch',

- 'continue',

- 'default',

- 'do',

- 'else',

- 'finally',

- 'for',

- 'if',

- 'return',

- 'switch',

- 'throw',

- 'try',

- 'var',

- 'while',

- 'with',

- ]

- # List of regular expressions to match as operators. Some notes: for our

- # purposes, the comma behaves similarly enough to a normal operator that we

- # include it here. r'\bin\b' actually matches 'in' surrounded by boundary

- # characters - this may not match some very esoteric uses of the in operator.

- # Operators that are subsets of larger operators must come later in this list

- # for proper matching, e.g., '>>' must come AFTER '>>>'.

- OPERATOR_LIST = [

- ',',

- r'\+\+',

- '===',

- '!==',

- '>>>=',

- '>>>',

- '==',

- '>=',

- '<=',

- '!=',

- '<<=',

- '>>=',

- '<<',

- '>>',

- '=>',

- '>',

- '<',

- r'\+=',

- r'\+',

- '--',

- r'\^=',

- '-=',

- '-',

- '/=',

- '/',

- r'\*=',

- r'\*',

- '%=',

- '%',

- '&&',

- r'\|\|',

- '&=',

- '&',

- r'\|=',

- r'\|',

- '=',

- '!',

- ':',

- r'\?',

- r'\^',

- r'\bdelete\b',

- r'\bin\b',

- r'\binstanceof\b',

- r'\bnew\b',

- r'\btypeof\b',

- r'\bvoid\b',

- r'\.',

- ]

- OPERATOR = re.compile('|'.join(OPERATOR_LIST))

- WHITESPACE = re.compile(r'\s+')

- SEMICOLON = re.compile(r';')

- # Technically JavaScript identifiers can't contain '.', but we treat a set of

- # nested identifiers as a single identifier, except for trailing dots.

- NESTED_IDENTIFIER = r'[a-zA-Z_$]([%s]|\.[a-zA-Z_$])*' % IDENTIFIER_CHAR

- IDENTIFIER = re.compile(NESTED_IDENTIFIER)

- SIMPLE_LVALUE = re.compile(r"""

- (?P<identifier>%s) # a valid identifier

- (?=\s* # optional whitespace

- \= # look ahead to equal sign

- (?!=)) # not follwed by equal

- """ % NESTED_IDENTIFIER, re.VERBOSE)

- # A doc flag is a @ sign followed by non-space characters that appears at the

- # beginning of the line, after whitespace, or after a '{'. The look-behind

- # check is necessary to not match someone@google.com as a flag.

- DOC_FLAG = re.compile(r'(^|(?<=\s))@(?P<name>[a-zA-Z]+)')

- # To properly parse parameter names and complex doctypes containing

- # whitespace, we need to tokenize whitespace into a token after certain

- # doctags. All statetracker.HAS_TYPE that are not listed here must not contain

- # any whitespace in their types.

- DOC_FLAG_LEX_SPACES = re.compile(

- r'(^|(?<=\s))@(?P<name>%s)\b' %

- '|'.join([

- 'const',

- 'enum',

- 'export',

- 'extends',

- 'final',

- 'implements',

- 'package',

- 'param',

- 'private',

- 'protected',

- 'public',

- 'return',

- 'type',

- 'typedef'

- ]))

- DOC_INLINE_FLAG = re.compile(r'(?<={)@(?P<name>[a-zA-Z]+)')

- DOC_TYPE_BLOCK_START = re.compile(r'[<(]')

- DOC_TYPE_BLOCK_END = re.compile(r'[>)]')

- DOC_TYPE_MODIFIERS = re.compile(r'[!?|,:=]')

- # Star followed by non-slash, i.e a star that does not end a comment.

- # This is used for TYPE_GROUP below.

- SAFE_STAR = r'(\*(?!/))'

- COMMON_DOC_MATCHERS = [

- # Find the end of the comment.

- Matcher(END_BLOCK_COMMENT, Type.END_DOC_COMMENT,

- JavaScriptModes.TEXT_MODE),

- # Tokenize documented flags like @private.

- Matcher(DOC_INLINE_FLAG, Type.DOC_INLINE_FLAG),

- Matcher(DOC_FLAG_LEX_SPACES, Type.DOC_FLAG,

- JavaScriptModes.DOC_COMMENT_LEX_SPACES_MODE),

- # Encountering a doc flag should leave lex spaces mode.

- Matcher(DOC_FLAG, Type.DOC_FLAG, JavaScriptModes.DOC_COMMENT_MODE),

- # Tokenize braces so we can find types.

- Matcher(START_BLOCK, Type.DOC_START_BRACE),

- Matcher(END_BLOCK, Type.DOC_END_BRACE),

- # And some more to parse types.

- Matcher(DOC_TYPE_BLOCK_START, Type.DOC_TYPE_START_BLOCK),

- Matcher(DOC_TYPE_BLOCK_END, Type.DOC_TYPE_END_BLOCK),

- Matcher(DOC_TYPE_MODIFIERS, Type.DOC_TYPE_MODIFIER),

- Matcher(DOC_COMMENT_TYPE_TEXT, Type.COMMENT),

- Matcher(DOC_PREFIX, Type.DOC_PREFIX, None, True)]

- # When text is not matched, it is given this default type based on mode.

- # If unspecified in this map, the default default is Type.NORMAL.

- JAVASCRIPT_DEFAULT_TYPES = {

- JavaScriptModes.DOC_COMMENT_MODE: Type.COMMENT,

- JavaScriptModes.DOC_COMMENT_LEX_SPACES_MODE: Type.COMMENT

- }

- @classmethod

- def BuildMatchers(cls):

- """Builds the token matcher group.

- The token matcher groups work as follows: it is a list of Matcher objects.

- The matchers will be tried in this order, and the first to match will be

- returned. Hence the order is important because the matchers that come first

- overrule the matchers that come later.

- Returns:

- The completed token matcher group.

- """

- # Match a keyword string followed by a non-identifier character in order to

- # not match something like doSomething as do + Something.

- keyword = re.compile('(%s)((?=[^%s])|$)' % (

- '|'.join(cls.KEYWORD_LIST), cls.IDENTIFIER_CHAR))

- return {

- # Matchers for basic text mode.

- JavaScriptModes.TEXT_MODE: [

- # Check a big group - strings, starting comments, and regexes - all

- # of which could be intertwined. 'string with /regex/',

- # /regex with 'string'/, /* comment with /regex/ and string */ (and

- # so on)

- Matcher(cls.START_DOC_COMMENT, Type.START_DOC_COMMENT,

- JavaScriptModes.DOC_COMMENT_MODE),

- Matcher(cls.START_BLOCK_COMMENT, Type.START_BLOCK_COMMENT,

- JavaScriptModes.BLOCK_COMMENT_MODE),

- Matcher(cls.END_OF_LINE_SINGLE_LINE_COMMENT,

- Type.START_SINGLE_LINE_COMMENT),

- Matcher(cls.START_SINGLE_LINE_COMMENT,

- Type.START_SINGLE_LINE_COMMENT,

- JavaScriptModes.LINE_COMMENT_MODE),

- Matcher(cls.SINGLE_QUOTE, Type.SINGLE_QUOTE_STRING_START,

- JavaScriptModes.SINGLE_QUOTE_STRING_MODE),

- Matcher(cls.DOUBLE_QUOTE, Type.DOUBLE_QUOTE_STRING_START,

- JavaScriptModes.DOUBLE_QUOTE_STRING_MODE),

- Matcher(cls.TEMPLATE_QUOTE, Type.TEMPLATE_STRING_START,

- JavaScriptModes.TEMPLATE_STRING_MODE),

- Matcher(cls.REGEX, Type.REGEX),

- # Next we check for start blocks appearing outside any of the items

- # above.

- Matcher(cls.START_BLOCK, Type.START_BLOCK),

- Matcher(cls.END_BLOCK, Type.END_BLOCK),

- # Then we search for function declarations.

- Matcher(cls.FUNCTION_DECLARATION, Type.FUNCTION_DECLARATION,

- JavaScriptModes.FUNCTION_MODE),

- # Next, we convert non-function related parens to tokens.

- Matcher(cls.OPENING_PAREN, Type.START_PAREN),

- Matcher(cls.CLOSING_PAREN, Type.END_PAREN),

- # Next, we convert brackets to tokens.

- Matcher(cls.OPENING_BRACKET, Type.START_BRACKET),

- Matcher(cls.CLOSING_BRACKET, Type.END_BRACKET),

- # Find numbers. This has to happen before operators because

- # scientific notation numbers can have + and - in them.

- Matcher(cls.NUMBER, Type.NUMBER),

- # Find operators and simple assignments

- Matcher(cls.SIMPLE_LVALUE, Type.SIMPLE_LVALUE),

- Matcher(cls.OPERATOR, Type.OPERATOR),

- # Find key words and whitespace.

- Matcher(keyword, Type.KEYWORD),

- Matcher(cls.WHITESPACE, Type.WHITESPACE),

- # Find identifiers.

- Matcher(cls.IDENTIFIER, Type.IDENTIFIER),

- # Finally, we convert semicolons to tokens.

- Matcher(cls.SEMICOLON, Type.SEMICOLON)],

- # Matchers for single quote strings.

- JavaScriptModes.SINGLE_QUOTE_STRING_MODE: [

- Matcher(cls.SINGLE_QUOTE_TEXT, Type.STRING_TEXT),

- Matcher(cls.SINGLE_QUOTE, Type.SINGLE_QUOTE_STRING_END,

- JavaScriptModes.TEXT_MODE)],

- # Matchers for double quote strings.

- JavaScriptModes.DOUBLE_QUOTE_STRING_MODE: [

- Matcher(cls.DOUBLE_QUOTE_TEXT, Type.STRING_TEXT),

- Matcher(cls.DOUBLE_QUOTE, Type.DOUBLE_QUOTE_STRING_END,

- JavaScriptModes.TEXT_MODE)],

- # Matchers for template strings.

- JavaScriptModes.TEMPLATE_STRING_MODE: [

- Matcher(cls.TEMPLATE_QUOTE_TEXT, Type.STRING_TEXT),

- Matcher(cls.TEMPLATE_QUOTE, Type.TEMPLATE_STRING_END,

- JavaScriptModes.TEXT_MODE)],

- # Matchers for block comments.

- JavaScriptModes.BLOCK_COMMENT_MODE: [

- # First we check for exiting a block comment.

- Matcher(cls.END_BLOCK_COMMENT, Type.END_BLOCK_COMMENT,

- JavaScriptModes.TEXT_MODE),

- # Match non-comment-ending text..

- Matcher(cls.BLOCK_COMMENT_TEXT, Type.COMMENT)],

- # Matchers for doc comments.

- JavaScriptModes.DOC_COMMENT_MODE: cls.COMMON_DOC_MATCHERS + [

- Matcher(cls.DOC_COMMENT_TEXT, Type.COMMENT)],

- JavaScriptModes.DOC_COMMENT_LEX_SPACES_MODE: cls.COMMON_DOC_MATCHERS + [

- Matcher(cls.WHITESPACE, Type.COMMENT),

- Matcher(cls.DOC_COMMENT_NO_SPACES_TEXT, Type.COMMENT)],

- # Matchers for single line comments.

- JavaScriptModes.LINE_COMMENT_MODE: [

- # We greedy match until the end of the line in line comment mode.

- Matcher(cls.ANYTHING, Type.COMMENT, JavaScriptModes.TEXT_MODE)],

- # Matchers for code after the function keyword.

- JavaScriptModes.FUNCTION_MODE: [

- # Must match open paren before anything else and move into parameter

- # mode, otherwise everything inside the parameter list is parsed

- # incorrectly.

- Matcher(cls.OPENING_PAREN, Type.START_PARAMETERS,

- JavaScriptModes.PARAMETER_MODE),

- Matcher(cls.WHITESPACE, Type.WHITESPACE),

- Matcher(cls.IDENTIFIER, Type.FUNCTION_NAME)],

- # Matchers for function parameters

- JavaScriptModes.PARAMETER_MODE: [

- # When in function parameter mode, a closing paren is treated

- # specially. Everything else is treated as lines of parameters.

- Matcher(cls.CLOSING_PAREN_WITH_SPACE, Type.END_PARAMETERS,

- JavaScriptModes.TEXT_MODE),

- Matcher(cls.PARAMETERS, Type.PARAMETERS,

- JavaScriptModes.PARAMETER_MODE)]}

- def __init__(self, parse_js_doc=True):

- """Create a tokenizer object.

- Args:

- parse_js_doc: Whether to do detailed parsing of javascript doc comments,

- or simply treat them as normal comments. Defaults to parsing JsDoc.

- """

- matchers = self.BuildMatchers()

- if not parse_js_doc:

- # Make a copy so the original doesn't get modified.

- matchers = copy.deepcopy(matchers)

- matchers[JavaScriptModes.DOC_COMMENT_MODE] = matchers[

- JavaScriptModes.BLOCK_COMMENT_MODE]

- tokenizer.Tokenizer.__init__(self, JavaScriptModes.TEXT_MODE, matchers,

- self.JAVASCRIPT_DEFAULT_TYPES)

- def _CreateToken(self, string, token_type, line, line_number, values=None):

- """Creates a new JavaScriptToken object.

- Args:

- string: The string of input the token contains.

- token_type: The type of token.

- line: The text of the line this token is in.

- line_number: The line number of the token.

- values: A dict of named values within the token. For instance, a

- function declaration may have a value called 'name' which captures the

- name of the function.

- """

- return javascripttokens.JavaScriptToken(string, token_type, line,

- line_number, values, line_number)