Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(795)

Unified Diff: third_party/closure_linter/closure_linter/javascripttokenizer.py

Issue 2328693002: Updated linter with upstream release (2.3.19) (Closed)
Patch Set: Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: third_party/closure_linter/closure_linter/javascripttokenizer.py
diff --git a/third_party/closure_linter/closure_linter/javascripttokenizer.py b/third_party/closure_linter/closure_linter/javascripttokenizer.py
index 7c5f8b141cc635f815932dfc3a95399f101c2dbf..964db7ce2fdcb4b17af1a8c565c1a08f7fb94fa6 100755
--- a/third_party/closure_linter/closure_linter/javascripttokenizer.py
+++ b/third_party/closure_linter/closure_linter/javascripttokenizer.py
@@ -36,6 +36,7 @@ class JavaScriptModes(object):
TEXT_MODE = 'text'
SINGLE_QUOTE_STRING_MODE = 'single_quote_string'
DOUBLE_QUOTE_STRING_MODE = 'double_quote_string'
+ TEMPLATE_STRING_MODE = 'template_string'
BLOCK_COMMENT_MODE = 'block_comment'
DOC_COMMENT_MODE = 'doc_comment'
DOC_COMMENT_LEX_SPACES_MODE = 'doc_comment_spaces'
@@ -51,7 +52,7 @@ class JavaScriptTokenizer(tokenizer.Tokenizer):
"""
# Useful patterns for JavaScript parsing.
- IDENTIFIER_CHAR = r'A-Za-z0-9_$.'
+ IDENTIFIER_CHAR = r'A-Za-z0-9_$'
# Number patterns based on:
# http://www.mozilla.org/js/language/js20-2000-07/formal/lexer-grammar.html
@@ -75,6 +76,10 @@ class JavaScriptTokenizer(tokenizer.Tokenizer):
SINGLE_QUOTE_TEXT = re.compile(r"([^'\\]|\\(.|$))+")
DOUBLE_QUOTE = re.compile(r'"')
DOUBLE_QUOTE_TEXT = re.compile(r'([^"\\]|\\(.|$))+')
+ # Template strings are different from normal strings in that they do not
+ # require escaping of end of lines in order to be multi-line.
+ TEMPLATE_QUOTE = re.compile(r'`')
+ TEMPLATE_QUOTE_TEXT = re.compile(r'([^`]|$)+')
START_SINGLE_LINE_COMMENT = re.compile(r'//')
END_OF_LINE_SINGLE_LINE_COMMENT = re.compile(r'//$')
@@ -92,6 +97,9 @@ class JavaScriptTokenizer(tokenizer.Tokenizer):
# like in email addresses in the @author tag.
DOC_COMMENT_TEXT = re.compile(r'([^*{}\s]@|[^*{}@]|\*(?!/))+')
DOC_COMMENT_NO_SPACES_TEXT = re.compile(r'([^*{}\s]@|[^*{}@\s]|\*(?!/))+')
+ # Match anything that is allowed in a type definition, except for tokens
+ # needed to parse it (and the lookahead assertion for "*/").
+ DOC_COMMENT_TYPE_TEXT = re.compile(r'([^*|!?=<>(){}:,\s]|\*(?!/))+')
# Match the prefix ' * ' that starts every line of jsdoc. Want to include
# spaces after the '*', but nothing else that occurs after a '*', and don't
@@ -141,9 +149,25 @@ class JavaScriptTokenizer(tokenizer.Tokenizer):
# delete, in, instanceof, new, typeof - included as operators.
# this - included in identifiers.
# null, undefined - not included, should go in some "special constant" list.
- KEYWORD_LIST = ['break', 'case', 'catch', 'continue', 'default', 'do', 'else',
- 'finally', 'for', 'if', 'return', 'switch', 'throw', 'try', 'var',
- 'while', 'with']
+ KEYWORD_LIST = [
+ 'break',
+ 'case',
+ 'catch',
+ 'continue',
+ 'default',
+ 'do',
+ 'else',
+ 'finally',
+ 'for',
+ 'if',
+ 'return',
+ 'switch',
+ 'throw',
+ 'try',
+ 'var',
+ 'while',
+ 'with',
+ ]
# List of regular expressions to match as operators. Some notes: for our
# purposes, the comma behaves similarly enough to a normal operator that we
@@ -151,19 +175,62 @@ class JavaScriptTokenizer(tokenizer.Tokenizer):
# characters - this may not match some very esoteric uses of the in operator.
# Operators that are subsets of larger operators must come later in this list
# for proper matching, e.g., '>>' must come AFTER '>>>'.
- OPERATOR_LIST = [',', r'\+\+', '===', '!==', '>>>=', '>>>', '==', '>=', '<=',
- '!=', '<<=', '>>=', '<<', '>>', '>', '<', r'\+=', r'\+',
- '--', '\^=', '-=', '-', '/=', '/', r'\*=', r'\*', '%=', '%',
- '&&', r'\|\|', '&=', '&', r'\|=', r'\|', '=', '!', ':', '\?',
- r'\^', r'\bdelete\b', r'\bin\b', r'\binstanceof\b',
- r'\bnew\b', r'\btypeof\b', r'\bvoid\b']
+ OPERATOR_LIST = [
+ ',',
+ r'\+\+',
+ '===',
+ '!==',
+ '>>>=',
+ '>>>',
+ '==',
+ '>=',
+ '<=',
+ '!=',
+ '<<=',
+ '>>=',
+ '<<',
+ '>>',
+ '=>',
+ '>',
+ '<',
+ r'\+=',
+ r'\+',
+ '--',
+ r'\^=',
+ '-=',
+ '-',
+ '/=',
+ '/',
+ r'\*=',
+ r'\*',
+ '%=',
+ '%',
+ '&&',
+ r'\|\|',
+ '&=',
+ '&',
+ r'\|=',
+ r'\|',
+ '=',
+ '!',
+ ':',
+ r'\?',
+ r'\^',
+ r'\bdelete\b',
+ r'\bin\b',
+ r'\binstanceof\b',
+ r'\bnew\b',
+ r'\btypeof\b',
+ r'\bvoid\b',
+ r'\.',
+ ]
OPERATOR = re.compile('|'.join(OPERATOR_LIST))
WHITESPACE = re.compile(r'\s+')
SEMICOLON = re.compile(r';')
# Technically JavaScript identifiers can't contain '.', but we treat a set of
- # nested identifiers as a single identifier.
- NESTED_IDENTIFIER = r'[a-zA-Z_$][%s.]*' % IDENTIFIER_CHAR
+ # nested identifiers as a single identifier, except for trailing dots.
+ NESTED_IDENTIFIER = r'[a-zA-Z_$]([%s]|\.[a-zA-Z_$])*' % IDENTIFIER_CHAR
IDENTIFIER = re.compile(NESTED_IDENTIFIER)
SIMPLE_LVALUE = re.compile(r"""
@@ -177,13 +244,35 @@ class JavaScriptTokenizer(tokenizer.Tokenizer):
# beginning of the line, after whitespace, or after a '{'. The look-behind
# check is necessary to not match someone@google.com as a flag.
DOC_FLAG = re.compile(r'(^|(?<=\s))@(?P<name>[a-zA-Z]+)')
- # To properly parse parameter names, we need to tokenize whitespace into a
- # token.
- DOC_FLAG_LEX_SPACES = re.compile(r'(^|(?<=\s))@(?P<name>%s)\b' %
- '|'.join(['param']))
+ # To properly parse parameter names and complex doctypes containing
+ # whitespace, we need to tokenize whitespace into a token after certain
+ # doctags. All statetracker.HAS_TYPE that are not listed here must not contain
+ # any whitespace in their types.
+ DOC_FLAG_LEX_SPACES = re.compile(
+ r'(^|(?<=\s))@(?P<name>%s)\b' %
+ '|'.join([
+ 'const',
+ 'enum',
+ 'export',
+ 'extends',
+ 'final',
+ 'implements',
+ 'package',
+ 'param',
+ 'private',
+ 'protected',
+ 'public',
+ 'return',
+ 'type',
+ 'typedef'
+ ]))
DOC_INLINE_FLAG = re.compile(r'(?<={)@(?P<name>[a-zA-Z]+)')
+ DOC_TYPE_BLOCK_START = re.compile(r'[<(]')
+ DOC_TYPE_BLOCK_END = re.compile(r'[>)]')
+ DOC_TYPE_MODIFIERS = re.compile(r'[!?|,:=]')
+
# Star followed by non-slash, i.e a star that does not end a comment.
# This is used for TYPE_GROUP below.
SAFE_STAR = r'(\*(?!/))'
@@ -204,6 +293,14 @@ class JavaScriptTokenizer(tokenizer.Tokenizer):
# Tokenize braces so we can find types.
Matcher(START_BLOCK, Type.DOC_START_BRACE),
Matcher(END_BLOCK, Type.DOC_END_BRACE),
+
+ # And some more to parse types.
+ Matcher(DOC_TYPE_BLOCK_START, Type.DOC_TYPE_START_BLOCK),
+ Matcher(DOC_TYPE_BLOCK_END, Type.DOC_TYPE_END_BLOCK),
+
+ Matcher(DOC_TYPE_MODIFIERS, Type.DOC_TYPE_MODIFIER),
+ Matcher(DOC_COMMENT_TYPE_TEXT, Type.COMMENT),
+
Matcher(DOC_PREFIX, Type.DOC_PREFIX, None, True)]
# When text is not matched, it is given this default type based on mode.
@@ -250,6 +347,8 @@ class JavaScriptTokenizer(tokenizer.Tokenizer):
JavaScriptModes.SINGLE_QUOTE_STRING_MODE),
Matcher(cls.DOUBLE_QUOTE, Type.DOUBLE_QUOTE_STRING_START,
JavaScriptModes.DOUBLE_QUOTE_STRING_MODE),
+ Matcher(cls.TEMPLATE_QUOTE, Type.TEMPLATE_STRING_START,
+ JavaScriptModes.TEMPLATE_STRING_MODE),
Matcher(cls.REGEX, Type.REGEX),
# Next we check for start blocks appearing outside any of the items
@@ -299,6 +398,12 @@ class JavaScriptTokenizer(tokenizer.Tokenizer):
Matcher(cls.DOUBLE_QUOTE, Type.DOUBLE_QUOTE_STRING_END,
JavaScriptModes.TEXT_MODE)],
+ # Matchers for template strings.
+ JavaScriptModes.TEMPLATE_STRING_MODE: [
+ Matcher(cls.TEMPLATE_QUOTE_TEXT, Type.STRING_TEXT),
+ Matcher(cls.TEMPLATE_QUOTE, Type.TEMPLATE_STRING_END,
+ JavaScriptModes.TEXT_MODE)],
+
# Matchers for block comments.
JavaScriptModes.BLOCK_COMMENT_MODE: [
# First we check for exiting a block comment.
@@ -340,7 +445,7 @@ class JavaScriptTokenizer(tokenizer.Tokenizer):
Matcher(cls.PARAMETERS, Type.PARAMETERS,
JavaScriptModes.PARAMETER_MODE)]}
- def __init__(self, parse_js_doc = True):
+ def __init__(self, parse_js_doc=True):
"""Create a tokenizer object.
Args:

Powered by Google App Engine
This is Rietveld 408576698