| Index: third_party/closure_linter/closure_linter/javascripttokenizer.py
|
| diff --git a/third_party/closure_linter/closure_linter/javascripttokenizer.py b/third_party/closure_linter/closure_linter/javascripttokenizer.py
|
| index 7c5f8b141cc635f815932dfc3a95399f101c2dbf..964db7ce2fdcb4b17af1a8c565c1a08f7fb94fa6 100755
|
| --- a/third_party/closure_linter/closure_linter/javascripttokenizer.py
|
| +++ b/third_party/closure_linter/closure_linter/javascripttokenizer.py
|
| @@ -36,6 +36,7 @@ class JavaScriptModes(object):
|
| TEXT_MODE = 'text'
|
| SINGLE_QUOTE_STRING_MODE = 'single_quote_string'
|
| DOUBLE_QUOTE_STRING_MODE = 'double_quote_string'
|
| + TEMPLATE_STRING_MODE = 'template_string'
|
| BLOCK_COMMENT_MODE = 'block_comment'
|
| DOC_COMMENT_MODE = 'doc_comment'
|
| DOC_COMMENT_LEX_SPACES_MODE = 'doc_comment_spaces'
|
| @@ -51,7 +52,7 @@ class JavaScriptTokenizer(tokenizer.Tokenizer):
|
| """
|
|
|
| # Useful patterns for JavaScript parsing.
|
| - IDENTIFIER_CHAR = r'A-Za-z0-9_$.'
|
| + IDENTIFIER_CHAR = r'A-Za-z0-9_$'
|
|
|
| # Number patterns based on:
|
| # http://www.mozilla.org/js/language/js20-2000-07/formal/lexer-grammar.html
|
| @@ -75,6 +76,10 @@ class JavaScriptTokenizer(tokenizer.Tokenizer):
|
| SINGLE_QUOTE_TEXT = re.compile(r"([^'\\]|\\(.|$))+")
|
| DOUBLE_QUOTE = re.compile(r'"')
|
| DOUBLE_QUOTE_TEXT = re.compile(r'([^"\\]|\\(.|$))+')
|
| + # Template strings are different from normal strings in that they do not
|
| + # require escaping of end of lines in order to be multi-line.
|
| + TEMPLATE_QUOTE = re.compile(r'`')
|
| + TEMPLATE_QUOTE_TEXT = re.compile(r'([^`]|$)+')
|
|
|
| START_SINGLE_LINE_COMMENT = re.compile(r'//')
|
| END_OF_LINE_SINGLE_LINE_COMMENT = re.compile(r'//$')
|
| @@ -92,6 +97,9 @@ class JavaScriptTokenizer(tokenizer.Tokenizer):
|
| # like in email addresses in the @author tag.
|
| DOC_COMMENT_TEXT = re.compile(r'([^*{}\s]@|[^*{}@]|\*(?!/))+')
|
| DOC_COMMENT_NO_SPACES_TEXT = re.compile(r'([^*{}\s]@|[^*{}@\s]|\*(?!/))+')
|
| + # Match anything that is allowed in a type definition, except for tokens
|
| + # needed to parse it (and the lookahead assertion for "*/").
|
| + DOC_COMMENT_TYPE_TEXT = re.compile(r'([^*|!?=<>(){}:,\s]|\*(?!/))+')
|
|
|
| # Match the prefix ' * ' that starts every line of jsdoc. Want to include
|
| # spaces after the '*', but nothing else that occurs after a '*', and don't
|
| @@ -141,9 +149,25 @@ class JavaScriptTokenizer(tokenizer.Tokenizer):
|
| # delete, in, instanceof, new, typeof - included as operators.
|
| # this - included in identifiers.
|
| # null, undefined - not included, should go in some "special constant" list.
|
| - KEYWORD_LIST = ['break', 'case', 'catch', 'continue', 'default', 'do', 'else',
|
| - 'finally', 'for', 'if', 'return', 'switch', 'throw', 'try', 'var',
|
| - 'while', 'with']
|
| + KEYWORD_LIST = [
|
| + 'break',
|
| + 'case',
|
| + 'catch',
|
| + 'continue',
|
| + 'default',
|
| + 'do',
|
| + 'else',
|
| + 'finally',
|
| + 'for',
|
| + 'if',
|
| + 'return',
|
| + 'switch',
|
| + 'throw',
|
| + 'try',
|
| + 'var',
|
| + 'while',
|
| + 'with',
|
| + ]
|
|
|
| # List of regular expressions to match as operators. Some notes: for our
|
| # purposes, the comma behaves similarly enough to a normal operator that we
|
| @@ -151,19 +175,62 @@ class JavaScriptTokenizer(tokenizer.Tokenizer):
|
| # characters - this may not match some very esoteric uses of the in operator.
|
| # Operators that are subsets of larger operators must come later in this list
|
| # for proper matching, e.g., '>>' must come AFTER '>>>'.
|
| - OPERATOR_LIST = [',', r'\+\+', '===', '!==', '>>>=', '>>>', '==', '>=', '<=',
|
| - '!=', '<<=', '>>=', '<<', '>>', '>', '<', r'\+=', r'\+',
|
| - '--', '\^=', '-=', '-', '/=', '/', r'\*=', r'\*', '%=', '%',
|
| - '&&', r'\|\|', '&=', '&', r'\|=', r'\|', '=', '!', ':', '\?',
|
| - r'\^', r'\bdelete\b', r'\bin\b', r'\binstanceof\b',
|
| - r'\bnew\b', r'\btypeof\b', r'\bvoid\b']
|
| + OPERATOR_LIST = [
|
| + ',',
|
| + r'\+\+',
|
| + '===',
|
| + '!==',
|
| + '>>>=',
|
| + '>>>',
|
| + '==',
|
| + '>=',
|
| + '<=',
|
| + '!=',
|
| + '<<=',
|
| + '>>=',
|
| + '<<',
|
| + '>>',
|
| + '=>',
|
| + '>',
|
| + '<',
|
| + r'\+=',
|
| + r'\+',
|
| + '--',
|
| + r'\^=',
|
| + '-=',
|
| + '-',
|
| + '/=',
|
| + '/',
|
| + r'\*=',
|
| + r'\*',
|
| + '%=',
|
| + '%',
|
| + '&&',
|
| + r'\|\|',
|
| + '&=',
|
| + '&',
|
| + r'\|=',
|
| + r'\|',
|
| + '=',
|
| + '!',
|
| + ':',
|
| + r'\?',
|
| + r'\^',
|
| + r'\bdelete\b',
|
| + r'\bin\b',
|
| + r'\binstanceof\b',
|
| + r'\bnew\b',
|
| + r'\btypeof\b',
|
| + r'\bvoid\b',
|
| + r'\.',
|
| + ]
|
| OPERATOR = re.compile('|'.join(OPERATOR_LIST))
|
|
|
| WHITESPACE = re.compile(r'\s+')
|
| SEMICOLON = re.compile(r';')
|
| # Technically JavaScript identifiers can't contain '.', but we treat a set of
|
| - # nested identifiers as a single identifier.
|
| - NESTED_IDENTIFIER = r'[a-zA-Z_$][%s.]*' % IDENTIFIER_CHAR
|
| + # nested identifiers as a single identifier, except for trailing dots.
|
| + NESTED_IDENTIFIER = r'[a-zA-Z_$]([%s]|\.[a-zA-Z_$])*' % IDENTIFIER_CHAR
|
| IDENTIFIER = re.compile(NESTED_IDENTIFIER)
|
|
|
| SIMPLE_LVALUE = re.compile(r"""
|
| @@ -177,13 +244,35 @@ class JavaScriptTokenizer(tokenizer.Tokenizer):
|
| # beginning of the line, after whitespace, or after a '{'. The look-behind
|
| # check is necessary to not match someone@google.com as a flag.
|
| DOC_FLAG = re.compile(r'(^|(?<=\s))@(?P<name>[a-zA-Z]+)')
|
| - # To properly parse parameter names, we need to tokenize whitespace into a
|
| - # token.
|
| - DOC_FLAG_LEX_SPACES = re.compile(r'(^|(?<=\s))@(?P<name>%s)\b' %
|
| - '|'.join(['param']))
|
| + # To properly parse parameter names and complex doctypes containing
|
| + # whitespace, we need to tokenize whitespace into a token after certain
|
| + # doctags. All statetracker.HAS_TYPE that are not listed here must not contain
|
| + # any whitespace in their types.
|
| + DOC_FLAG_LEX_SPACES = re.compile(
|
| + r'(^|(?<=\s))@(?P<name>%s)\b' %
|
| + '|'.join([
|
| + 'const',
|
| + 'enum',
|
| + 'export',
|
| + 'extends',
|
| + 'final',
|
| + 'implements',
|
| + 'package',
|
| + 'param',
|
| + 'private',
|
| + 'protected',
|
| + 'public',
|
| + 'return',
|
| + 'type',
|
| + 'typedef'
|
| + ]))
|
|
|
| DOC_INLINE_FLAG = re.compile(r'(?<={)@(?P<name>[a-zA-Z]+)')
|
|
|
| + DOC_TYPE_BLOCK_START = re.compile(r'[<(]')
|
| + DOC_TYPE_BLOCK_END = re.compile(r'[>)]')
|
| + DOC_TYPE_MODIFIERS = re.compile(r'[!?|,:=]')
|
| +
|
| # Star followed by non-slash, i.e a star that does not end a comment.
|
| # This is used for TYPE_GROUP below.
|
| SAFE_STAR = r'(\*(?!/))'
|
| @@ -204,6 +293,14 @@ class JavaScriptTokenizer(tokenizer.Tokenizer):
|
| # Tokenize braces so we can find types.
|
| Matcher(START_BLOCK, Type.DOC_START_BRACE),
|
| Matcher(END_BLOCK, Type.DOC_END_BRACE),
|
| +
|
| + # And some more to parse types.
|
| + Matcher(DOC_TYPE_BLOCK_START, Type.DOC_TYPE_START_BLOCK),
|
| + Matcher(DOC_TYPE_BLOCK_END, Type.DOC_TYPE_END_BLOCK),
|
| +
|
| + Matcher(DOC_TYPE_MODIFIERS, Type.DOC_TYPE_MODIFIER),
|
| + Matcher(DOC_COMMENT_TYPE_TEXT, Type.COMMENT),
|
| +
|
| Matcher(DOC_PREFIX, Type.DOC_PREFIX, None, True)]
|
|
|
| # When text is not matched, it is given this default type based on mode.
|
| @@ -250,6 +347,8 @@ class JavaScriptTokenizer(tokenizer.Tokenizer):
|
| JavaScriptModes.SINGLE_QUOTE_STRING_MODE),
|
| Matcher(cls.DOUBLE_QUOTE, Type.DOUBLE_QUOTE_STRING_START,
|
| JavaScriptModes.DOUBLE_QUOTE_STRING_MODE),
|
| + Matcher(cls.TEMPLATE_QUOTE, Type.TEMPLATE_STRING_START,
|
| + JavaScriptModes.TEMPLATE_STRING_MODE),
|
| Matcher(cls.REGEX, Type.REGEX),
|
|
|
| # Next we check for start blocks appearing outside any of the items
|
| @@ -299,6 +398,12 @@ class JavaScriptTokenizer(tokenizer.Tokenizer):
|
| Matcher(cls.DOUBLE_QUOTE, Type.DOUBLE_QUOTE_STRING_END,
|
| JavaScriptModes.TEXT_MODE)],
|
|
|
| + # Matchers for template strings.
|
| + JavaScriptModes.TEMPLATE_STRING_MODE: [
|
| + Matcher(cls.TEMPLATE_QUOTE_TEXT, Type.STRING_TEXT),
|
| + Matcher(cls.TEMPLATE_QUOTE, Type.TEMPLATE_STRING_END,
|
| + JavaScriptModes.TEXT_MODE)],
|
| +
|
| # Matchers for block comments.
|
| JavaScriptModes.BLOCK_COMMENT_MODE: [
|
| # First we check for exiting a block comment.
|
| @@ -340,7 +445,7 @@ class JavaScriptTokenizer(tokenizer.Tokenizer):
|
| Matcher(cls.PARAMETERS, Type.PARAMETERS,
|
| JavaScriptModes.PARAMETER_MODE)]}
|
|
|
| - def __init__(self, parse_js_doc = True):
|
| + def __init__(self, parse_js_doc=True):
|
| """Create a tokenizer object.
|
|
|
| Args:
|
|
|