| OLD | NEW |
| (Empty) |
| 1 #!/usr/bin/env python | |
| 2 # | |
| 3 # Copyright 2007 The Closure Linter Authors. All Rights Reserved. | |
| 4 # | |
| 5 # Licensed under the Apache License, Version 2.0 (the "License"); | |
| 6 # you may not use this file except in compliance with the License. | |
| 7 # You may obtain a copy of the License at | |
| 8 # | |
| 9 # http://www.apache.org/licenses/LICENSE-2.0 | |
| 10 # | |
| 11 # Unless required by applicable law or agreed to in writing, software | |
| 12 # distributed under the License is distributed on an "AS-IS" BASIS, | |
| 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 14 # See the License for the specific language governing permissions and | |
| 15 # limitations under the License. | |
| 16 | |
| 17 """Regular expression based JavaScript parsing classes.""" | |
| 18 | |
| 19 __author__ = ('robbyw@google.com (Robert Walker)', | |
| 20 'ajp@google.com (Andy Perelson)') | |
| 21 | |
| 22 import copy | |
| 23 import re | |
| 24 | |
| 25 from closure_linter import javascripttokens | |
| 26 from closure_linter.common import matcher | |
| 27 from closure_linter.common import tokenizer | |
| 28 | |
| 29 # Shorthand | |
| 30 Type = javascripttokens.JavaScriptTokenType | |
| 31 Matcher = matcher.Matcher | |
| 32 | |
| 33 | |
| 34 class JavaScriptModes(object): | |
| 35 """Enumeration of the different matcher modes used for JavaScript.""" | |
| 36 TEXT_MODE = 'text' | |
| 37 SINGLE_QUOTE_STRING_MODE = 'single_quote_string' | |
| 38 DOUBLE_QUOTE_STRING_MODE = 'double_quote_string' | |
| 39 TEMPLATE_STRING_MODE = 'template_string' | |
| 40 BLOCK_COMMENT_MODE = 'block_comment' | |
| 41 DOC_COMMENT_MODE = 'doc_comment' | |
| 42 DOC_COMMENT_LEX_SPACES_MODE = 'doc_comment_spaces' | |
| 43 LINE_COMMENT_MODE = 'line_comment' | |
| 44 PARAMETER_MODE = 'parameter' | |
| 45 FUNCTION_MODE = 'function' | |
| 46 | |
| 47 | |
| 48 class JavaScriptTokenizer(tokenizer.Tokenizer): | |
| 49 """JavaScript tokenizer. | |
| 50 | |
| 51 Convert JavaScript code in to an array of tokens. | |
| 52 """ | |
| 53 | |
| 54 # Useful patterns for JavaScript parsing. | |
| 55 IDENTIFIER_CHAR = r'A-Za-z0-9_$' | |
| 56 | |
| 57 # Number patterns based on: | |
| 58 # http://www.mozilla.org/js/language/js20-2000-07/formal/lexer-grammar.html | |
| 59 MANTISSA = r""" | |
| 60 (\d+(?!\.)) | # Matches '10' | |
| 61 (\d+\.(?!\d)) | # Matches '10.' | |
| 62 (\d*\.\d+) # Matches '.5' or '10.5' | |
| 63 """ | |
| 64 DECIMAL_LITERAL = r'(%s)([eE][-+]?\d+)?' % MANTISSA | |
| 65 HEX_LITERAL = r'0[xX][0-9a-fA-F]+' | |
| 66 NUMBER = re.compile(r""" | |
| 67 ((%s)|(%s)) | |
| 68 """ % (HEX_LITERAL, DECIMAL_LITERAL), re.VERBOSE) | |
| 69 | |
| 70 # Strings come in three parts - first we match the start of the string, then | |
| 71 # the contents, then the end. The contents consist of any character except a | |
| 72 # backslash or end of string, or a backslash followed by any character, or a | |
| 73 # backslash followed by end of line to support correct parsing of multi-line | |
| 74 # strings. | |
| 75 SINGLE_QUOTE = re.compile(r"'") | |
| 76 SINGLE_QUOTE_TEXT = re.compile(r"([^'\\]|\\(.|$))+") | |
| 77 DOUBLE_QUOTE = re.compile(r'"') | |
| 78 DOUBLE_QUOTE_TEXT = re.compile(r'([^"\\]|\\(.|$))+') | |
| 79 # Template strings are different from normal strings in that they do not | |
| 80 # require escaping of end of lines in order to be multi-line. | |
| 81 TEMPLATE_QUOTE = re.compile(r'`') | |
| 82 TEMPLATE_QUOTE_TEXT = re.compile(r'([^`]|$)+') | |
| 83 | |
| 84 START_SINGLE_LINE_COMMENT = re.compile(r'//') | |
| 85 END_OF_LINE_SINGLE_LINE_COMMENT = re.compile(r'//$') | |
| 86 | |
| 87 START_DOC_COMMENT = re.compile(r'/\*\*') | |
| 88 START_BLOCK_COMMENT = re.compile(r'/\*') | |
| 89 END_BLOCK_COMMENT = re.compile(r'\*/') | |
| 90 BLOCK_COMMENT_TEXT = re.compile(r'([^*]|\*(?!/))+') | |
| 91 | |
| 92 # Comment text is anything that we are not going to parse into another special | |
| 93 # token like (inline) flags or end comments. Complicated regex to match | |
| 94 # most normal characters, and '*', '{', '}', and '@' when we are sure that | |
| 95 # it is safe. Expression [^*{\s]@ must come first, or the other options will | |
| 96 # match everything before @, and we won't match @'s that aren't part of flags | |
| 97 # like in email addresses in the @author tag. | |
| 98 DOC_COMMENT_TEXT = re.compile(r'([^*{}\s]@|[^*{}@]|\*(?!/))+') | |
| 99 DOC_COMMENT_NO_SPACES_TEXT = re.compile(r'([^*{}\s]@|[^*{}@\s]|\*(?!/))+') | |
| 100 # Match anything that is allowed in a type definition, except for tokens | |
| 101 # needed to parse it (and the lookahead assertion for "*/"). | |
| 102 DOC_COMMENT_TYPE_TEXT = re.compile(r'([^*|!?=<>(){}:,\s]|\*(?!/))+') | |
| 103 | |
| 104 # Match the prefix ' * ' that starts every line of jsdoc. Want to include | |
| 105 # spaces after the '*', but nothing else that occurs after a '*', and don't | |
| 106 # want to match the '*' in '*/'. | |
| 107 DOC_PREFIX = re.compile(r'\s*\*(\s+|(?!/))') | |
| 108 | |
| 109 START_BLOCK = re.compile('{') | |
| 110 END_BLOCK = re.compile('}') | |
| 111 | |
| 112 REGEX_CHARACTER_CLASS = r""" | |
| 113 \[ # Opening bracket | |
| 114 ([^\]\\]|\\.)* # Anything but a ] or \, | |
| 115 # or a backslash followed by anything | |
| 116 \] # Closing bracket | |
| 117 """ | |
| 118 # We ensure the regex is followed by one of the above tokens to avoid | |
| 119 # incorrectly parsing something like x / y / z as x REGEX(/ y /) z | |
| 120 POST_REGEX_LIST = [ | |
| 121 ';', ',', r'\.', r'\)', r'\]', '$', r'\/\/', r'\/\*', ':', '}'] | |
| 122 | |
| 123 REGEX = re.compile(r""" | |
| 124 / # opening slash | |
| 125 (?!\*) # not the start of a comment | |
| 126 (\\.|[^\[\/\\]|(%s))* # a backslash followed by anything, | |
| 127 # or anything but a / or [ or \, | |
| 128 # or a character class | |
| 129 / # closing slash | |
| 130 [gimsx]* # optional modifiers | |
| 131 (?=\s*(%s)) | |
| 132 """ % (REGEX_CHARACTER_CLASS, '|'.join(POST_REGEX_LIST)), | |
| 133 re.VERBOSE) | |
| 134 | |
| 135 ANYTHING = re.compile(r'.*') | |
| 136 PARAMETERS = re.compile(r'[^\)]+') | |
| 137 CLOSING_PAREN_WITH_SPACE = re.compile(r'\)\s*') | |
| 138 | |
| 139 FUNCTION_DECLARATION = re.compile(r'\bfunction\b') | |
| 140 | |
| 141 OPENING_PAREN = re.compile(r'\(') | |
| 142 CLOSING_PAREN = re.compile(r'\)') | |
| 143 | |
| 144 OPENING_BRACKET = re.compile(r'\[') | |
| 145 CLOSING_BRACKET = re.compile(r'\]') | |
| 146 | |
| 147 # We omit these JS keywords from the list: | |
| 148 # function - covered by FUNCTION_DECLARATION. | |
| 149 # delete, in, instanceof, new, typeof - included as operators. | |
| 150 # this - included in identifiers. | |
| 151 # null, undefined - not included, should go in some "special constant" list. | |
| 152 KEYWORD_LIST = [ | |
| 153 'break', | |
| 154 'case', | |
| 155 'catch', | |
| 156 'continue', | |
| 157 'default', | |
| 158 'do', | |
| 159 'else', | |
| 160 'finally', | |
| 161 'for', | |
| 162 'if', | |
| 163 'return', | |
| 164 'switch', | |
| 165 'throw', | |
| 166 'try', | |
| 167 'var', | |
| 168 'while', | |
| 169 'with', | |
| 170 ] | |
| 171 | |
| 172 # List of regular expressions to match as operators. Some notes: for our | |
| 173 # purposes, the comma behaves similarly enough to a normal operator that we | |
| 174 # include it here. r'\bin\b' actually matches 'in' surrounded by boundary | |
| 175 # characters - this may not match some very esoteric uses of the in operator. | |
| 176 # Operators that are subsets of larger operators must come later in this list | |
| 177 # for proper matching, e.g., '>>' must come AFTER '>>>'. | |
| 178 OPERATOR_LIST = [ | |
| 179 ',', | |
| 180 r'\+\+', | |
| 181 '===', | |
| 182 '!==', | |
| 183 '>>>=', | |
| 184 '>>>', | |
| 185 '==', | |
| 186 '>=', | |
| 187 '<=', | |
| 188 '!=', | |
| 189 '<<=', | |
| 190 '>>=', | |
| 191 '<<', | |
| 192 '>>', | |
| 193 '=>', | |
| 194 '>', | |
| 195 '<', | |
| 196 r'\+=', | |
| 197 r'\+', | |
| 198 '--', | |
| 199 r'\^=', | |
| 200 '-=', | |
| 201 '-', | |
| 202 '/=', | |
| 203 '/', | |
| 204 r'\*=', | |
| 205 r'\*', | |
| 206 '%=', | |
| 207 '%', | |
| 208 '&&', | |
| 209 r'\|\|', | |
| 210 '&=', | |
| 211 '&', | |
| 212 r'\|=', | |
| 213 r'\|', | |
| 214 '=', | |
| 215 '!', | |
| 216 ':', | |
| 217 r'\?', | |
| 218 r'\^', | |
| 219 r'\bdelete\b', | |
| 220 r'\bin\b', | |
| 221 r'\binstanceof\b', | |
| 222 r'\bnew\b', | |
| 223 r'\btypeof\b', | |
| 224 r'\bvoid\b', | |
| 225 r'\.', | |
| 226 ] | |
| 227 OPERATOR = re.compile('|'.join(OPERATOR_LIST)) | |
| 228 | |
| 229 WHITESPACE = re.compile(r'\s+') | |
| 230 SEMICOLON = re.compile(r';') | |
| 231 # Technically JavaScript identifiers can't contain '.', but we treat a set of | |
| 232 # nested identifiers as a single identifier, except for trailing dots. | |
| 233 NESTED_IDENTIFIER = r'[a-zA-Z_$]([%s]|\.[a-zA-Z_$])*' % IDENTIFIER_CHAR | |
| 234 IDENTIFIER = re.compile(NESTED_IDENTIFIER) | |
| 235 | |
| 236 SIMPLE_LVALUE = re.compile(r""" | |
| 237 (?P<identifier>%s) # a valid identifier | |
| 238 (?=\s* # optional whitespace | |
| 239 \= # look ahead to equal sign | |
| 240 (?!=)) # not follwed by equal | |
| 241 """ % NESTED_IDENTIFIER, re.VERBOSE) | |
| 242 | |
| 243 # A doc flag is a @ sign followed by non-space characters that appears at the | |
| 244 # beginning of the line, after whitespace, or after a '{'. The look-behind | |
| 245 # check is necessary to not match someone@google.com as a flag. | |
| 246 DOC_FLAG = re.compile(r'(^|(?<=\s))@(?P<name>[a-zA-Z]+)') | |
| 247 # To properly parse parameter names and complex doctypes containing | |
| 248 # whitespace, we need to tokenize whitespace into a token after certain | |
| 249 # doctags. All statetracker.HAS_TYPE that are not listed here must not contain | |
| 250 # any whitespace in their types. | |
| 251 DOC_FLAG_LEX_SPACES = re.compile( | |
| 252 r'(^|(?<=\s))@(?P<name>%s)\b' % | |
| 253 '|'.join([ | |
| 254 'const', | |
| 255 'enum', | |
| 256 'export', | |
| 257 'extends', | |
| 258 'final', | |
| 259 'implements', | |
| 260 'package', | |
| 261 'param', | |
| 262 'private', | |
| 263 'protected', | |
| 264 'public', | |
| 265 'return', | |
| 266 'type', | |
| 267 'typedef' | |
| 268 ])) | |
| 269 | |
| 270 DOC_INLINE_FLAG = re.compile(r'(?<={)@(?P<name>[a-zA-Z]+)') | |
| 271 | |
| 272 DOC_TYPE_BLOCK_START = re.compile(r'[<(]') | |
| 273 DOC_TYPE_BLOCK_END = re.compile(r'[>)]') | |
| 274 DOC_TYPE_MODIFIERS = re.compile(r'[!?|,:=]') | |
| 275 | |
| 276 # Star followed by non-slash, i.e a star that does not end a comment. | |
| 277 # This is used for TYPE_GROUP below. | |
| 278 SAFE_STAR = r'(\*(?!/))' | |
| 279 | |
| 280 COMMON_DOC_MATCHERS = [ | |
| 281 # Find the end of the comment. | |
| 282 Matcher(END_BLOCK_COMMENT, Type.END_DOC_COMMENT, | |
| 283 JavaScriptModes.TEXT_MODE), | |
| 284 | |
| 285 # Tokenize documented flags like @private. | |
| 286 Matcher(DOC_INLINE_FLAG, Type.DOC_INLINE_FLAG), | |
| 287 Matcher(DOC_FLAG_LEX_SPACES, Type.DOC_FLAG, | |
| 288 JavaScriptModes.DOC_COMMENT_LEX_SPACES_MODE), | |
| 289 | |
| 290 # Encountering a doc flag should leave lex spaces mode. | |
| 291 Matcher(DOC_FLAG, Type.DOC_FLAG, JavaScriptModes.DOC_COMMENT_MODE), | |
| 292 | |
| 293 # Tokenize braces so we can find types. | |
| 294 Matcher(START_BLOCK, Type.DOC_START_BRACE), | |
| 295 Matcher(END_BLOCK, Type.DOC_END_BRACE), | |
| 296 | |
| 297 # And some more to parse types. | |
| 298 Matcher(DOC_TYPE_BLOCK_START, Type.DOC_TYPE_START_BLOCK), | |
| 299 Matcher(DOC_TYPE_BLOCK_END, Type.DOC_TYPE_END_BLOCK), | |
| 300 | |
| 301 Matcher(DOC_TYPE_MODIFIERS, Type.DOC_TYPE_MODIFIER), | |
| 302 Matcher(DOC_COMMENT_TYPE_TEXT, Type.COMMENT), | |
| 303 | |
| 304 Matcher(DOC_PREFIX, Type.DOC_PREFIX, None, True)] | |
| 305 | |
| 306 # When text is not matched, it is given this default type based on mode. | |
| 307 # If unspecified in this map, the default default is Type.NORMAL. | |
| 308 JAVASCRIPT_DEFAULT_TYPES = { | |
| 309 JavaScriptModes.DOC_COMMENT_MODE: Type.COMMENT, | |
| 310 JavaScriptModes.DOC_COMMENT_LEX_SPACES_MODE: Type.COMMENT | |
| 311 } | |
| 312 | |
| 313 @classmethod | |
| 314 def BuildMatchers(cls): | |
| 315 """Builds the token matcher group. | |
| 316 | |
| 317 The token matcher groups work as follows: it is a list of Matcher objects. | |
| 318 The matchers will be tried in this order, and the first to match will be | |
| 319 returned. Hence the order is important because the matchers that come first | |
| 320 overrule the matchers that come later. | |
| 321 | |
| 322 Returns: | |
| 323 The completed token matcher group. | |
| 324 """ | |
| 325 # Match a keyword string followed by a non-identifier character in order to | |
| 326 # not match something like doSomething as do + Something. | |
| 327 keyword = re.compile('(%s)((?=[^%s])|$)' % ( | |
| 328 '|'.join(cls.KEYWORD_LIST), cls.IDENTIFIER_CHAR)) | |
| 329 return { | |
| 330 | |
| 331 # Matchers for basic text mode. | |
| 332 JavaScriptModes.TEXT_MODE: [ | |
| 333 # Check a big group - strings, starting comments, and regexes - all | |
| 334 # of which could be intertwined. 'string with /regex/', | |
| 335 # /regex with 'string'/, /* comment with /regex/ and string */ (and | |
| 336 # so on) | |
| 337 Matcher(cls.START_DOC_COMMENT, Type.START_DOC_COMMENT, | |
| 338 JavaScriptModes.DOC_COMMENT_MODE), | |
| 339 Matcher(cls.START_BLOCK_COMMENT, Type.START_BLOCK_COMMENT, | |
| 340 JavaScriptModes.BLOCK_COMMENT_MODE), | |
| 341 Matcher(cls.END_OF_LINE_SINGLE_LINE_COMMENT, | |
| 342 Type.START_SINGLE_LINE_COMMENT), | |
| 343 Matcher(cls.START_SINGLE_LINE_COMMENT, | |
| 344 Type.START_SINGLE_LINE_COMMENT, | |
| 345 JavaScriptModes.LINE_COMMENT_MODE), | |
| 346 Matcher(cls.SINGLE_QUOTE, Type.SINGLE_QUOTE_STRING_START, | |
| 347 JavaScriptModes.SINGLE_QUOTE_STRING_MODE), | |
| 348 Matcher(cls.DOUBLE_QUOTE, Type.DOUBLE_QUOTE_STRING_START, | |
| 349 JavaScriptModes.DOUBLE_QUOTE_STRING_MODE), | |
| 350 Matcher(cls.TEMPLATE_QUOTE, Type.TEMPLATE_STRING_START, | |
| 351 JavaScriptModes.TEMPLATE_STRING_MODE), | |
| 352 Matcher(cls.REGEX, Type.REGEX), | |
| 353 | |
| 354 # Next we check for start blocks appearing outside any of the items | |
| 355 # above. | |
| 356 Matcher(cls.START_BLOCK, Type.START_BLOCK), | |
| 357 Matcher(cls.END_BLOCK, Type.END_BLOCK), | |
| 358 | |
| 359 # Then we search for function declarations. | |
| 360 Matcher(cls.FUNCTION_DECLARATION, Type.FUNCTION_DECLARATION, | |
| 361 JavaScriptModes.FUNCTION_MODE), | |
| 362 | |
| 363 # Next, we convert non-function related parens to tokens. | |
| 364 Matcher(cls.OPENING_PAREN, Type.START_PAREN), | |
| 365 Matcher(cls.CLOSING_PAREN, Type.END_PAREN), | |
| 366 | |
| 367 # Next, we convert brackets to tokens. | |
| 368 Matcher(cls.OPENING_BRACKET, Type.START_BRACKET), | |
| 369 Matcher(cls.CLOSING_BRACKET, Type.END_BRACKET), | |
| 370 | |
| 371 # Find numbers. This has to happen before operators because | |
| 372 # scientific notation numbers can have + and - in them. | |
| 373 Matcher(cls.NUMBER, Type.NUMBER), | |
| 374 | |
| 375 # Find operators and simple assignments | |
| 376 Matcher(cls.SIMPLE_LVALUE, Type.SIMPLE_LVALUE), | |
| 377 Matcher(cls.OPERATOR, Type.OPERATOR), | |
| 378 | |
| 379 # Find key words and whitespace. | |
| 380 Matcher(keyword, Type.KEYWORD), | |
| 381 Matcher(cls.WHITESPACE, Type.WHITESPACE), | |
| 382 | |
| 383 # Find identifiers. | |
| 384 Matcher(cls.IDENTIFIER, Type.IDENTIFIER), | |
| 385 | |
| 386 # Finally, we convert semicolons to tokens. | |
| 387 Matcher(cls.SEMICOLON, Type.SEMICOLON)], | |
| 388 | |
| 389 # Matchers for single quote strings. | |
| 390 JavaScriptModes.SINGLE_QUOTE_STRING_MODE: [ | |
| 391 Matcher(cls.SINGLE_QUOTE_TEXT, Type.STRING_TEXT), | |
| 392 Matcher(cls.SINGLE_QUOTE, Type.SINGLE_QUOTE_STRING_END, | |
| 393 JavaScriptModes.TEXT_MODE)], | |
| 394 | |
| 395 # Matchers for double quote strings. | |
| 396 JavaScriptModes.DOUBLE_QUOTE_STRING_MODE: [ | |
| 397 Matcher(cls.DOUBLE_QUOTE_TEXT, Type.STRING_TEXT), | |
| 398 Matcher(cls.DOUBLE_QUOTE, Type.DOUBLE_QUOTE_STRING_END, | |
| 399 JavaScriptModes.TEXT_MODE)], | |
| 400 | |
| 401 # Matchers for template strings. | |
| 402 JavaScriptModes.TEMPLATE_STRING_MODE: [ | |
| 403 Matcher(cls.TEMPLATE_QUOTE_TEXT, Type.STRING_TEXT), | |
| 404 Matcher(cls.TEMPLATE_QUOTE, Type.TEMPLATE_STRING_END, | |
| 405 JavaScriptModes.TEXT_MODE)], | |
| 406 | |
| 407 # Matchers for block comments. | |
| 408 JavaScriptModes.BLOCK_COMMENT_MODE: [ | |
| 409 # First we check for exiting a block comment. | |
| 410 Matcher(cls.END_BLOCK_COMMENT, Type.END_BLOCK_COMMENT, | |
| 411 JavaScriptModes.TEXT_MODE), | |
| 412 | |
| 413 # Match non-comment-ending text.. | |
| 414 Matcher(cls.BLOCK_COMMENT_TEXT, Type.COMMENT)], | |
| 415 | |
| 416 # Matchers for doc comments. | |
| 417 JavaScriptModes.DOC_COMMENT_MODE: cls.COMMON_DOC_MATCHERS + [ | |
| 418 Matcher(cls.DOC_COMMENT_TEXT, Type.COMMENT)], | |
| 419 | |
| 420 JavaScriptModes.DOC_COMMENT_LEX_SPACES_MODE: cls.COMMON_DOC_MATCHERS + [ | |
| 421 Matcher(cls.WHITESPACE, Type.COMMENT), | |
| 422 Matcher(cls.DOC_COMMENT_NO_SPACES_TEXT, Type.COMMENT)], | |
| 423 | |
| 424 # Matchers for single line comments. | |
| 425 JavaScriptModes.LINE_COMMENT_MODE: [ | |
| 426 # We greedy match until the end of the line in line comment mode. | |
| 427 Matcher(cls.ANYTHING, Type.COMMENT, JavaScriptModes.TEXT_MODE)], | |
| 428 | |
| 429 # Matchers for code after the function keyword. | |
| 430 JavaScriptModes.FUNCTION_MODE: [ | |
| 431 # Must match open paren before anything else and move into parameter | |
| 432 # mode, otherwise everything inside the parameter list is parsed | |
| 433 # incorrectly. | |
| 434 Matcher(cls.OPENING_PAREN, Type.START_PARAMETERS, | |
| 435 JavaScriptModes.PARAMETER_MODE), | |
| 436 Matcher(cls.WHITESPACE, Type.WHITESPACE), | |
| 437 Matcher(cls.IDENTIFIER, Type.FUNCTION_NAME)], | |
| 438 | |
| 439 # Matchers for function parameters | |
| 440 JavaScriptModes.PARAMETER_MODE: [ | |
| 441 # When in function parameter mode, a closing paren is treated | |
| 442 # specially. Everything else is treated as lines of parameters. | |
| 443 Matcher(cls.CLOSING_PAREN_WITH_SPACE, Type.END_PARAMETERS, | |
| 444 JavaScriptModes.TEXT_MODE), | |
| 445 Matcher(cls.PARAMETERS, Type.PARAMETERS, | |
| 446 JavaScriptModes.PARAMETER_MODE)]} | |
| 447 | |
| 448 def __init__(self, parse_js_doc=True): | |
| 449 """Create a tokenizer object. | |
| 450 | |
| 451 Args: | |
| 452 parse_js_doc: Whether to do detailed parsing of javascript doc comments, | |
| 453 or simply treat them as normal comments. Defaults to parsing JsDoc. | |
| 454 """ | |
| 455 matchers = self.BuildMatchers() | |
| 456 if not parse_js_doc: | |
| 457 # Make a copy so the original doesn't get modified. | |
| 458 matchers = copy.deepcopy(matchers) | |
| 459 matchers[JavaScriptModes.DOC_COMMENT_MODE] = matchers[ | |
| 460 JavaScriptModes.BLOCK_COMMENT_MODE] | |
| 461 | |
| 462 tokenizer.Tokenizer.__init__(self, JavaScriptModes.TEXT_MODE, matchers, | |
| 463 self.JAVASCRIPT_DEFAULT_TYPES) | |
| 464 | |
| 465 def _CreateToken(self, string, token_type, line, line_number, values=None): | |
| 466 """Creates a new JavaScriptToken object. | |
| 467 | |
| 468 Args: | |
| 469 string: The string of input the token contains. | |
| 470 token_type: The type of token. | |
| 471 line: The text of the line this token is in. | |
| 472 line_number: The line number of the token. | |
| 473 values: A dict of named values within the token. For instance, a | |
| 474 function declaration may have a value called 'name' which captures the | |
| 475 name of the function. | |
| 476 """ | |
| 477 return javascripttokens.JavaScriptToken(string, token_type, line, | |
| 478 line_number, values, line_number) | |
| OLD | NEW |