Index: third_party/closure_linter/closure_linter/javascripttokenizer.py |
diff --git a/third_party/closure_linter/closure_linter/javascripttokenizer.py b/third_party/closure_linter/closure_linter/javascripttokenizer.py |
index 98f91849a1b33892ff58aa0ebbe68bd6183c9d87..7c5f8b141cc635f815932dfc3a95399f101c2dbf 100755 |
--- a/third_party/closure_linter/closure_linter/javascripttokenizer.py |
+++ b/third_party/closure_linter/closure_linter/javascripttokenizer.py |
@@ -144,10 +144,6 @@ class JavaScriptTokenizer(tokenizer.Tokenizer): |
KEYWORD_LIST = ['break', 'case', 'catch', 'continue', 'default', 'do', 'else', |
'finally', 'for', 'if', 'return', 'switch', 'throw', 'try', 'var', |
'while', 'with'] |
- # Match a keyword string followed by a non-identifier character in order to |
- # not match something like doSomething as do + Something. |
- KEYWORD = re.compile('(%s)((?=[^%s])|$)' % ( |
- '|'.join(KEYWORD_LIST), IDENTIFIER_CHAR)) |
# List of regular expressions to match as operators. Some notes: for our |
# purposes, the comma behaves similarly enough to a normal operator that we |
@@ -159,8 +155,8 @@ class JavaScriptTokenizer(tokenizer.Tokenizer): |
'!=', '<<=', '>>=', '<<', '>>', '>', '<', r'\+=', r'\+', |
'--', '\^=', '-=', '-', '/=', '/', r'\*=', r'\*', '%=', '%', |
'&&', r'\|\|', '&=', '&', r'\|=', r'\|', '=', '!', ':', '\?', |
- r'\bdelete\b', r'\bin\b', r'\binstanceof\b', r'\bnew\b', |
- r'\btypeof\b', r'\bvoid\b'] |
+ r'\^', r'\bdelete\b', r'\bin\b', r'\binstanceof\b', |
+ r'\bnew\b', r'\btypeof\b', r'\bvoid\b'] |
OPERATOR = re.compile('|'.join(OPERATOR_LIST)) |
WHITESPACE = re.compile(r'\s+') |
@@ -210,126 +206,140 @@ class JavaScriptTokenizer(tokenizer.Tokenizer): |
Matcher(END_BLOCK, Type.DOC_END_BRACE), |
Matcher(DOC_PREFIX, Type.DOC_PREFIX, None, True)] |
- |
- # The token matcher groups work as follows: it is an list of Matcher objects. |
- # The matchers will be tried in this order, and the first to match will be |
- # returned. Hence the order is important because the matchers that come first |
- # overrule the matchers that come later. |
- JAVASCRIPT_MATCHERS = { |
- # Matchers for basic text mode. |
- JavaScriptModes.TEXT_MODE: [ |
- # Check a big group - strings, starting comments, and regexes - all |
- # of which could be intertwined. 'string with /regex/', |
- # /regex with 'string'/, /* comment with /regex/ and string */ (and so |
- # on) |
- Matcher(START_DOC_COMMENT, Type.START_DOC_COMMENT, |
- JavaScriptModes.DOC_COMMENT_MODE), |
- Matcher(START_BLOCK_COMMENT, Type.START_BLOCK_COMMENT, |
- JavaScriptModes.BLOCK_COMMENT_MODE), |
- Matcher(END_OF_LINE_SINGLE_LINE_COMMENT, |
- Type.START_SINGLE_LINE_COMMENT), |
- Matcher(START_SINGLE_LINE_COMMENT, Type.START_SINGLE_LINE_COMMENT, |
- JavaScriptModes.LINE_COMMENT_MODE), |
- Matcher(SINGLE_QUOTE, Type.SINGLE_QUOTE_STRING_START, |
- JavaScriptModes.SINGLE_QUOTE_STRING_MODE), |
- Matcher(DOUBLE_QUOTE, Type.DOUBLE_QUOTE_STRING_START, |
- JavaScriptModes.DOUBLE_QUOTE_STRING_MODE), |
- Matcher(REGEX, Type.REGEX), |
- |
- # Next we check for start blocks appearing outside any of the items |
- # above. |
- Matcher(START_BLOCK, Type.START_BLOCK), |
- Matcher(END_BLOCK, Type.END_BLOCK), |
- |
- # Then we search for function declarations. |
- Matcher(FUNCTION_DECLARATION, Type.FUNCTION_DECLARATION, |
- JavaScriptModes.FUNCTION_MODE), |
- |
- # Next, we convert non-function related parens to tokens. |
- Matcher(OPENING_PAREN, Type.START_PAREN), |
- Matcher(CLOSING_PAREN, Type.END_PAREN), |
- |
- # Next, we convert brackets to tokens. |
- Matcher(OPENING_BRACKET, Type.START_BRACKET), |
- Matcher(CLOSING_BRACKET, Type.END_BRACKET), |
- |
- # Find numbers. This has to happen before operators because scientific |
- # notation numbers can have + and - in them. |
- Matcher(NUMBER, Type.NUMBER), |
- |
- # Find operators and simple assignments |
- Matcher(SIMPLE_LVALUE, Type.SIMPLE_LVALUE), |
- Matcher(OPERATOR, Type.OPERATOR), |
- |
- # Find key words and whitespace. |
- Matcher(KEYWORD, Type.KEYWORD), |
- Matcher(WHITESPACE, Type.WHITESPACE), |
- |
- # Find identifiers. |
- Matcher(IDENTIFIER, Type.IDENTIFIER), |
- |
- # Finally, we convert semicolons to tokens. |
- Matcher(SEMICOLON, Type.SEMICOLON)], |
- |
- # Matchers for single quote strings. |
- JavaScriptModes.SINGLE_QUOTE_STRING_MODE: [ |
- Matcher(SINGLE_QUOTE_TEXT, Type.STRING_TEXT), |
- Matcher(SINGLE_QUOTE, Type.SINGLE_QUOTE_STRING_END, |
- JavaScriptModes.TEXT_MODE)], |
- |
- # Matchers for double quote strings. |
- JavaScriptModes.DOUBLE_QUOTE_STRING_MODE: [ |
- Matcher(DOUBLE_QUOTE_TEXT, Type.STRING_TEXT), |
- Matcher(DOUBLE_QUOTE, Type.DOUBLE_QUOTE_STRING_END, |
- JavaScriptModes.TEXT_MODE)], |
- |
- # Matchers for block comments. |
- JavaScriptModes.BLOCK_COMMENT_MODE: [ |
- # First we check for exiting a block comment. |
- Matcher(END_BLOCK_COMMENT, Type.END_BLOCK_COMMENT, |
- JavaScriptModes.TEXT_MODE), |
- |
- # Match non-comment-ending text.. |
- Matcher(BLOCK_COMMENT_TEXT, Type.COMMENT)], |
- |
- # Matchers for doc comments. |
- JavaScriptModes.DOC_COMMENT_MODE: COMMON_DOC_MATCHERS + [ |
- Matcher(DOC_COMMENT_TEXT, Type.COMMENT)], |
- |
- JavaScriptModes.DOC_COMMENT_LEX_SPACES_MODE: COMMON_DOC_MATCHERS + [ |
- Matcher(WHITESPACE, Type.COMMENT), |
- Matcher(DOC_COMMENT_NO_SPACES_TEXT, Type.COMMENT)], |
- |
- # Matchers for single line comments. |
- JavaScriptModes.LINE_COMMENT_MODE: [ |
- # We greedy match until the end of the line in line comment mode. |
- Matcher(ANYTHING, Type.COMMENT, JavaScriptModes.TEXT_MODE)], |
- |
- # Matchers for code after the function keyword. |
- JavaScriptModes.FUNCTION_MODE: [ |
- # Must match open paren before anything else and move into parameter |
- # mode, otherwise everything inside the parameter list is parsed |
- # incorrectly. |
- Matcher(OPENING_PAREN, Type.START_PARAMETERS, |
- JavaScriptModes.PARAMETER_MODE), |
- Matcher(WHITESPACE, Type.WHITESPACE), |
- Matcher(IDENTIFIER, Type.FUNCTION_NAME)], |
- |
- # Matchers for function parameters |
- JavaScriptModes.PARAMETER_MODE: [ |
- # When in function parameter mode, a closing paren is treated specially. |
- # Everything else is treated as lines of parameters. |
- Matcher(CLOSING_PAREN_WITH_SPACE, Type.END_PARAMETERS, |
- JavaScriptModes.TEXT_MODE), |
- Matcher(PARAMETERS, Type.PARAMETERS, JavaScriptModes.PARAMETER_MODE)]} |
- |
# When text is not matched, it is given this default type based on mode. |
# If unspecified in this map, the default default is Type.NORMAL. |
JAVASCRIPT_DEFAULT_TYPES = { |
- JavaScriptModes.DOC_COMMENT_MODE: Type.COMMENT, |
- JavaScriptModes.DOC_COMMENT_LEX_SPACES_MODE: Type.COMMENT |
+ JavaScriptModes.DOC_COMMENT_MODE: Type.COMMENT, |
+ JavaScriptModes.DOC_COMMENT_LEX_SPACES_MODE: Type.COMMENT |
} |
+ @classmethod |
+ def BuildMatchers(cls): |
+ """Builds the token matcher group. |
+ |
+ The token matcher groups work as follows: it is a list of Matcher objects. |
+ The matchers will be tried in this order, and the first to match will be |
+ returned. Hence the order is important because the matchers that come first |
+ overrule the matchers that come later. |
+ |
+ Returns: |
+ The completed token matcher group. |
+ """ |
+ # Match a keyword string followed by a non-identifier character in order to |
+ # not match something like doSomething as do + Something. |
+ keyword = re.compile('(%s)((?=[^%s])|$)' % ( |
+ '|'.join(cls.KEYWORD_LIST), cls.IDENTIFIER_CHAR)) |
+ return { |
+ |
+ # Matchers for basic text mode. |
+ JavaScriptModes.TEXT_MODE: [ |
+ # Check a big group - strings, starting comments, and regexes - all |
+ # of which could be intertwined. 'string with /regex/', |
+ # /regex with 'string'/, /* comment with /regex/ and string */ (and |
+ # so on) |
+ Matcher(cls.START_DOC_COMMENT, Type.START_DOC_COMMENT, |
+ JavaScriptModes.DOC_COMMENT_MODE), |
+ Matcher(cls.START_BLOCK_COMMENT, Type.START_BLOCK_COMMENT, |
+ JavaScriptModes.BLOCK_COMMENT_MODE), |
+ Matcher(cls.END_OF_LINE_SINGLE_LINE_COMMENT, |
+ Type.START_SINGLE_LINE_COMMENT), |
+ Matcher(cls.START_SINGLE_LINE_COMMENT, |
+ Type.START_SINGLE_LINE_COMMENT, |
+ JavaScriptModes.LINE_COMMENT_MODE), |
+ Matcher(cls.SINGLE_QUOTE, Type.SINGLE_QUOTE_STRING_START, |
+ JavaScriptModes.SINGLE_QUOTE_STRING_MODE), |
+ Matcher(cls.DOUBLE_QUOTE, Type.DOUBLE_QUOTE_STRING_START, |
+ JavaScriptModes.DOUBLE_QUOTE_STRING_MODE), |
+ Matcher(cls.REGEX, Type.REGEX), |
+ |
+ # Next we check for start blocks appearing outside any of the items |
+ # above. |
+ Matcher(cls.START_BLOCK, Type.START_BLOCK), |
+ Matcher(cls.END_BLOCK, Type.END_BLOCK), |
+ |
+ # Then we search for function declarations. |
+ Matcher(cls.FUNCTION_DECLARATION, Type.FUNCTION_DECLARATION, |
+ JavaScriptModes.FUNCTION_MODE), |
+ |
+ # Next, we convert non-function related parens to tokens. |
+ Matcher(cls.OPENING_PAREN, Type.START_PAREN), |
+ Matcher(cls.CLOSING_PAREN, Type.END_PAREN), |
+ |
+ # Next, we convert brackets to tokens. |
+ Matcher(cls.OPENING_BRACKET, Type.START_BRACKET), |
+ Matcher(cls.CLOSING_BRACKET, Type.END_BRACKET), |
+ |
+ # Find numbers. This has to happen before operators because |
+ # scientific notation numbers can have + and - in them. |
+ Matcher(cls.NUMBER, Type.NUMBER), |
+ |
+ # Find operators and simple assignments |
+ Matcher(cls.SIMPLE_LVALUE, Type.SIMPLE_LVALUE), |
+ Matcher(cls.OPERATOR, Type.OPERATOR), |
+ |
+ # Find key words and whitespace. |
+ Matcher(keyword, Type.KEYWORD), |
+ Matcher(cls.WHITESPACE, Type.WHITESPACE), |
+ |
+ # Find identifiers. |
+ Matcher(cls.IDENTIFIER, Type.IDENTIFIER), |
+ |
+ # Finally, we convert semicolons to tokens. |
+ Matcher(cls.SEMICOLON, Type.SEMICOLON)], |
+ |
+ # Matchers for single quote strings. |
+ JavaScriptModes.SINGLE_QUOTE_STRING_MODE: [ |
+ Matcher(cls.SINGLE_QUOTE_TEXT, Type.STRING_TEXT), |
+ Matcher(cls.SINGLE_QUOTE, Type.SINGLE_QUOTE_STRING_END, |
+ JavaScriptModes.TEXT_MODE)], |
+ |
+ # Matchers for double quote strings. |
+ JavaScriptModes.DOUBLE_QUOTE_STRING_MODE: [ |
+ Matcher(cls.DOUBLE_QUOTE_TEXT, Type.STRING_TEXT), |
+ Matcher(cls.DOUBLE_QUOTE, Type.DOUBLE_QUOTE_STRING_END, |
+ JavaScriptModes.TEXT_MODE)], |
+ |
+ # Matchers for block comments. |
+ JavaScriptModes.BLOCK_COMMENT_MODE: [ |
+ # First we check for exiting a block comment. |
+ Matcher(cls.END_BLOCK_COMMENT, Type.END_BLOCK_COMMENT, |
+ JavaScriptModes.TEXT_MODE), |
+ |
+ # Match non-comment-ending text.. |
+ Matcher(cls.BLOCK_COMMENT_TEXT, Type.COMMENT)], |
+ |
+ # Matchers for doc comments. |
+ JavaScriptModes.DOC_COMMENT_MODE: cls.COMMON_DOC_MATCHERS + [ |
+ Matcher(cls.DOC_COMMENT_TEXT, Type.COMMENT)], |
+ |
+ JavaScriptModes.DOC_COMMENT_LEX_SPACES_MODE: cls.COMMON_DOC_MATCHERS + [ |
+ Matcher(cls.WHITESPACE, Type.COMMENT), |
+ Matcher(cls.DOC_COMMENT_NO_SPACES_TEXT, Type.COMMENT)], |
+ |
+ # Matchers for single line comments. |
+ JavaScriptModes.LINE_COMMENT_MODE: [ |
+ # We greedy match until the end of the line in line comment mode. |
+ Matcher(cls.ANYTHING, Type.COMMENT, JavaScriptModes.TEXT_MODE)], |
+ |
+ # Matchers for code after the function keyword. |
+ JavaScriptModes.FUNCTION_MODE: [ |
+ # Must match open paren before anything else and move into parameter |
+ # mode, otherwise everything inside the parameter list is parsed |
+ # incorrectly. |
+ Matcher(cls.OPENING_PAREN, Type.START_PARAMETERS, |
+ JavaScriptModes.PARAMETER_MODE), |
+ Matcher(cls.WHITESPACE, Type.WHITESPACE), |
+ Matcher(cls.IDENTIFIER, Type.FUNCTION_NAME)], |
+ |
+ # Matchers for function parameters |
+ JavaScriptModes.PARAMETER_MODE: [ |
+ # When in function parameter mode, a closing paren is treated |
+ # specially. Everything else is treated as lines of parameters. |
+ Matcher(cls.CLOSING_PAREN_WITH_SPACE, Type.END_PARAMETERS, |
+ JavaScriptModes.TEXT_MODE), |
+ Matcher(cls.PARAMETERS, Type.PARAMETERS, |
+ JavaScriptModes.PARAMETER_MODE)]} |
+ |
def __init__(self, parse_js_doc = True): |
"""Create a tokenizer object. |
@@ -337,7 +347,7 @@ class JavaScriptTokenizer(tokenizer.Tokenizer): |
parse_js_doc: Whether to do detailed parsing of javascript doc comments, |
or simply treat them as normal comments. Defaults to parsing JsDoc. |
""" |
- matchers = self.JAVASCRIPT_MATCHERS |
+ matchers = self.BuildMatchers() |
if not parse_js_doc: |
# Make a copy so the original doesn't get modified. |
matchers = copy.deepcopy(matchers) |
@@ -360,4 +370,4 @@ class JavaScriptTokenizer(tokenizer.Tokenizer): |
name of the function. |
""" |
return javascripttokens.JavaScriptToken(string, token_type, line, |
- line_number, values) |
+ line_number, values, line_number) |