| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file | |
| 2 // for details. All rights reserved. Use of this source code is governed by a | |
| 3 // BSD-style license that can be found in the LICENSE file. | |
| 4 // Generated by scripts/tokenizer_gen.py. | |
| 5 | |
| 6 | |
| 7 abstract class TokenSource { | |
| 8 Token next(); | |
| 9 } | |
| 10 | |
| 11 class InterpStack { | |
| 12 InterpStack next, previous; | |
| 13 final int quote; | |
| 14 final bool isMultiline; | |
| 15 int depth; | |
| 16 | |
| 17 InterpStack(this.previous, this.quote, this.isMultiline): depth = -1; | |
| 18 | |
| 19 InterpStack pop() { | |
| 20 return this.previous; | |
| 21 } | |
| 22 | |
| 23 static InterpStack push(InterpStack stack, int quote, bool isMultiline) { | |
| 24 var newStack = new InterpStack(stack, quote, isMultiline); | |
| 25 if (stack != null) newStack.previous = stack; | |
| 26 return newStack; | |
| 27 } | |
| 28 } | |
| 29 | |
| 30 /** | |
| 31 * The base class for our tokenizer. The hand coded parts are in this file, with | |
| 32 * the generated parts in the subclass Tokenizer. | |
| 33 */ | |
| 34 class TokenizerBase extends TokenizerHelpers implements TokenSource { | |
| 35 final SourceFile _source; | |
| 36 final bool _skipWhitespace; | |
| 37 String _text; | |
| 38 | |
| 39 int _index; | |
| 40 int _startIndex; | |
| 41 | |
| 42 /** Keeps track of string interpolation state. */ | |
| 43 InterpStack _interpStack; | |
| 44 | |
| 45 TokenizerBase(this._source, this._skipWhitespace, [index = 0]) | |
| 46 : this._index = index { | |
| 47 _text = _source.text; | |
| 48 } | |
| 49 | |
| 50 abstract Token next(); | |
| 51 abstract int getIdentifierKind(); | |
| 52 | |
| 53 int _nextChar() { | |
| 54 if (_index < _text.length) { | |
| 55 return _text.codeUnitAt(_index++); | |
| 56 } else { | |
| 57 return 0; | |
| 58 } | |
| 59 } | |
| 60 | |
| 61 int _peekChar() { | |
| 62 if (_index < _text.length) { | |
| 63 return _text.codeUnitAt(_index); | |
| 64 } else { | |
| 65 return 0; | |
| 66 } | |
| 67 } | |
| 68 | |
| 69 bool _maybeEatChar(int ch) { | |
| 70 if (_index < _text.length) { | |
| 71 if (_text.codeUnitAt(_index) == ch) { | |
| 72 _index++; | |
| 73 return true; | |
| 74 } else { | |
| 75 return false; | |
| 76 } | |
| 77 } else { | |
| 78 return false; | |
| 79 } | |
| 80 } | |
| 81 | |
| 82 String _tokenText() { | |
| 83 if (_index < _text.length) { | |
| 84 return _text.substring(_startIndex, _index); | |
| 85 } else { | |
| 86 return _text.substring(_startIndex, _text.length); | |
| 87 } | |
| 88 } | |
| 89 | |
| 90 Token _finishToken(int kind) { | |
| 91 return new Token(kind, _source, _startIndex, _index); | |
| 92 } | |
| 93 | |
| 94 Token _errorToken([String message = null]) { | |
| 95 return new ErrorToken( | |
| 96 TokenKind.ERROR, _source, _startIndex, _index, message); | |
| 97 } | |
| 98 | |
| 99 Token finishWhitespace() { | |
| 100 _index--; | |
| 101 while (_index < _text.length) { | |
| 102 final ch = _text.codeUnitAt(_index++); | |
| 103 if (ch == 32/*' '*/ || ch == 9/*'\t'*/ || ch == 13/*'\r'*/) { | |
| 104 // do nothing | |
| 105 } else if (ch == 10/*'\n'*/) { | |
| 106 if (!_skipWhitespace) { | |
| 107 return _finishToken(TokenKind.WHITESPACE); // note the newline? | |
| 108 } | |
| 109 } else { | |
| 110 _index--; | |
| 111 if (_skipWhitespace) { | |
| 112 return next(); | |
| 113 } else { | |
| 114 return _finishToken(TokenKind.WHITESPACE); | |
| 115 } | |
| 116 } | |
| 117 | |
| 118 } | |
| 119 return _finishToken(TokenKind.END_OF_FILE); | |
| 120 } | |
| 121 | |
| 122 Token finishSingleLineComment() { | |
| 123 while (true) { | |
| 124 int ch = _nextChar(); | |
| 125 if (ch == 0 || ch == 10/*'\n'*/ || ch == 13/*'\r'*/) { | |
| 126 if (_skipWhitespace) { | |
| 127 return next(); | |
| 128 } else { | |
| 129 return _finishToken(TokenKind.COMMENT); | |
| 130 } | |
| 131 } | |
| 132 } | |
| 133 } | |
| 134 | |
| 135 Token finishMultiLineComment() { | |
| 136 int nesting = 1; | |
| 137 do { | |
| 138 int ch = _nextChar(); | |
| 139 if (ch == 0) { | |
| 140 return _errorToken(); | |
| 141 } else if (ch == 42/*'*'*/) { | |
| 142 if (_maybeEatChar(47/*'/'*/)) { | |
| 143 nesting--; | |
| 144 } | |
| 145 } else if (ch == 47/*'/'*/) { | |
| 146 if (_maybeEatChar(42/*'*'*/)) { | |
| 147 nesting++; | |
| 148 } | |
| 149 } | |
| 150 } while (nesting > 0); | |
| 151 | |
| 152 if (_skipWhitespace) { | |
| 153 return next(); | |
| 154 } else { | |
| 155 return _finishToken(TokenKind.COMMENT); | |
| 156 } | |
| 157 } | |
| 158 | |
| 159 void eatDigits() { | |
| 160 while (_index < _text.length) { | |
| 161 if (TokenizerHelpers.isDigit(_text.codeUnitAt(_index))) { | |
| 162 _index++; | |
| 163 } else { | |
| 164 return; | |
| 165 } | |
| 166 } | |
| 167 } | |
| 168 | |
| 169 static int _hexDigit(int c) { | |
| 170 if(c >= 48/*0*/ && c <= 57/*9*/) { | |
| 171 return c - 48; | |
| 172 } else if (c >= 97/*a*/ && c <= 102/*f*/) { | |
| 173 return c - 87; | |
| 174 } else if (c >= 65/*A*/ && c <= 70/*F*/) { | |
| 175 return c - 55; | |
| 176 } else { | |
| 177 return -1; | |
| 178 } | |
| 179 } | |
| 180 | |
| 181 int readHex([int hexLength]) { | |
| 182 int maxIndex; | |
| 183 if (hexLength == null) { | |
| 184 maxIndex = _text.length - 1; | |
| 185 } else { | |
| 186 // TODO(jimhug): What if this is too long? | |
| 187 maxIndex = _index + hexLength; | |
| 188 if (maxIndex >= _text.length) return -1; | |
| 189 } | |
| 190 var result = 0; | |
| 191 while (_index < maxIndex) { | |
| 192 final digit = _hexDigit(_text.codeUnitAt(_index)); | |
| 193 if (digit == -1) { | |
| 194 if (hexLength == null) { | |
| 195 return result; | |
| 196 } else { | |
| 197 return -1; | |
| 198 } | |
| 199 } | |
| 200 _hexDigit(_text.codeUnitAt(_index)); | |
| 201 // Multiply by 16 rather than shift by 4 since that will result in a | |
| 202 // correct value for numbers that exceed the 32 bit precision of JS | |
| 203 // 'integers'. | |
| 204 // TODO: Figure out a better solution to integer truncation. Issue 638. | |
| 205 result = (result * 16) + digit; | |
| 206 _index++; | |
| 207 } | |
| 208 | |
| 209 return result; | |
| 210 } | |
| 211 | |
| 212 Token finishNumber() { | |
| 213 eatDigits(); | |
| 214 | |
| 215 if (_peekChar() == 46/*.*/) { | |
| 216 // Handle the case of 1.toString(). | |
| 217 _nextChar(); | |
| 218 if (TokenizerHelpers.isDigit(_peekChar())) { | |
| 219 eatDigits(); | |
| 220 return finishNumberExtra(TokenKind.DOUBLE); | |
| 221 } else { | |
| 222 _index--; | |
| 223 } | |
| 224 } | |
| 225 | |
| 226 return finishNumberExtra(TokenKind.INTEGER); | |
| 227 } | |
| 228 | |
| 229 Token finishNumberExtra(int kind) { | |
| 230 if (_maybeEatChar(101/*e*/) || _maybeEatChar(69/*E*/)) { | |
| 231 kind = TokenKind.DOUBLE; | |
| 232 _maybeEatChar(45/*-*/); | |
| 233 _maybeEatChar(43/*+*/); | |
| 234 eatDigits(); | |
| 235 } | |
| 236 if (_peekChar() != 0 && TokenizerHelpers.isIdentifierStart(_peekChar())) { | |
| 237 _nextChar(); | |
| 238 return _errorToken("illegal character in number"); | |
| 239 } | |
| 240 | |
| 241 return _finishToken(kind); | |
| 242 } | |
| 243 | |
| 244 Token _makeStringToken(List<int> buf, bool isPart) { | |
| 245 final s = new String.fromCharCodes(buf); | |
| 246 final kind = isPart ? TokenKind.STRING_PART : TokenKind.STRING; | |
| 247 return new LiteralToken(kind, _source, _startIndex, _index, s); | |
| 248 } | |
| 249 | |
| 250 Token _makeRawStringToken(bool isMultiline) { | |
| 251 String s; | |
| 252 if (isMultiline) { | |
| 253 // Skip initial newline in multiline strings | |
| 254 int start = _startIndex + 4; | |
| 255 if (_source.text[start] == '\n') start++; | |
| 256 s = _source.text.substring(start, _index - 3); | |
| 257 } else { | |
| 258 s = _source.text.substring(_startIndex + 2, _index - 1); | |
| 259 } | |
| 260 return new LiteralToken(TokenKind.STRING, _source, _startIndex, _index, s); | |
| 261 } | |
| 262 | |
| 263 Token finishMultilineString(int quote) { | |
| 264 var buf = <int>[]; | |
| 265 while (true) { | |
| 266 int ch = _nextChar(); | |
| 267 if (ch == 0) { | |
| 268 return _errorToken(); | |
| 269 } else if (ch == quote) { | |
| 270 if (_maybeEatChar(quote)) { | |
| 271 if (_maybeEatChar(quote)) { | |
| 272 return _makeStringToken(buf, false); | |
| 273 } | |
| 274 buf.add(quote); | |
| 275 } | |
| 276 buf.add(quote); | |
| 277 } else if (ch == 36/*$*/) { | |
| 278 // start of string interp | |
| 279 _interpStack = InterpStack.push(_interpStack, quote, true); | |
| 280 return _makeStringToken(buf, true); | |
| 281 } else if (ch == 92/*\*/) { | |
| 282 var escapeVal = readEscapeSequence(); | |
| 283 if (escapeVal == -1) { | |
| 284 return _errorToken("invalid hex escape sequence"); | |
| 285 } else { | |
| 286 buf.add(escapeVal); | |
| 287 } | |
| 288 } else { | |
| 289 buf.add(ch); | |
| 290 } | |
| 291 } | |
| 292 } | |
| 293 | |
| 294 Token _finishOpenBrace() { | |
| 295 if (_interpStack != null) { | |
| 296 if (_interpStack.depth == -1) { | |
| 297 _interpStack.depth = 1; | |
| 298 } else { | |
| 299 assert(_interpStack.depth >= 0); | |
| 300 _interpStack.depth += 1; | |
| 301 } | |
| 302 } | |
| 303 return _finishToken(TokenKind.LBRACE); | |
| 304 } | |
| 305 | |
| 306 Token _finishCloseBrace() { | |
| 307 if (_interpStack != null) { | |
| 308 _interpStack.depth -= 1; | |
| 309 assert(_interpStack.depth >= 0); | |
| 310 } | |
| 311 return _finishToken(TokenKind.RBRACE); | |
| 312 } | |
| 313 | |
| 314 Token finishString(int quote) { | |
| 315 if (_maybeEatChar(quote)) { | |
| 316 if (_maybeEatChar(quote)) { | |
| 317 // skip an initial newline | |
| 318 _maybeEatChar(10/*'\n'*/); | |
| 319 return finishMultilineString(quote); | |
| 320 } else { | |
| 321 return _makeStringToken(new List<int>(), false); | |
| 322 } | |
| 323 } | |
| 324 return finishStringBody(quote); | |
| 325 } | |
| 326 | |
| 327 Token finishRawString(int quote) { | |
| 328 if (_maybeEatChar(quote)) { | |
| 329 if (_maybeEatChar(quote)) { | |
| 330 return finishMultilineRawString(quote); | |
| 331 } else { | |
| 332 return _makeStringToken(<int>[], false); | |
| 333 } | |
| 334 } | |
| 335 while (true) { | |
| 336 int ch = _nextChar(); | |
| 337 if (ch == quote) { | |
| 338 return _makeRawStringToken(false); | |
| 339 } else if (ch == 0) { | |
| 340 return _errorToken(); | |
| 341 } | |
| 342 } | |
| 343 } | |
| 344 | |
| 345 Token finishMultilineRawString(int quote) { | |
| 346 while (true) { | |
| 347 int ch = _nextChar(); | |
| 348 if (ch == 0) { | |
| 349 return _errorToken(); | |
| 350 } else if (ch == quote && _maybeEatChar(quote) && _maybeEatChar(quote)) { | |
| 351 return _makeRawStringToken(true); | |
| 352 } | |
| 353 } | |
| 354 } | |
| 355 | |
| 356 Token finishStringBody(int quote) { | |
| 357 var buf = new List<int>(); | |
| 358 while (true) { | |
| 359 int ch = _nextChar(); | |
| 360 if (ch == quote) { | |
| 361 return _makeStringToken(buf, false); | |
| 362 } else if (ch == 36/*$*/) { | |
| 363 // start of string interp | |
| 364 _interpStack = InterpStack.push(_interpStack, quote, false); | |
| 365 return _makeStringToken(buf, true); | |
| 366 } else if (ch == 0) { | |
| 367 return _errorToken(); | |
| 368 } else if (ch == 92/*\*/) { | |
| 369 var escapeVal = readEscapeSequence(); | |
| 370 if (escapeVal == -1) { | |
| 371 return _errorToken("invalid hex escape sequence"); | |
| 372 } else { | |
| 373 buf.add(escapeVal); | |
| 374 } | |
| 375 } else { | |
| 376 buf.add(ch); | |
| 377 } | |
| 378 } | |
| 379 } | |
| 380 | |
| 381 int readEscapeSequence() { | |
| 382 final ch = _nextChar(); | |
| 383 int hexValue; | |
| 384 switch (ch) { | |
| 385 case 110/*n*/: | |
| 386 return 0x0a/*'\n'*/; | |
| 387 case 114/*r*/: | |
| 388 return 0x0d/*'\r'*/; | |
| 389 case 102/*f*/: | |
| 390 return 0x0c/*'\f'*/; | |
| 391 case 98/*b*/: | |
| 392 return 0x08/*'\b'*/; | |
| 393 case 116/*t*/: | |
| 394 return 0x09/*'\t'*/; | |
| 395 case 118/*v*/: | |
| 396 return 0x0b/*'\v'*/; | |
| 397 case 120/*x*/: | |
| 398 hexValue = readHex(2); | |
| 399 break; | |
| 400 case 117/*u*/: | |
| 401 if (_maybeEatChar(123/*{*/)) { | |
| 402 hexValue = readHex(); | |
| 403 if (!_maybeEatChar(125/*}*/)) { | |
| 404 return -1; | |
| 405 } else { | |
| 406 break; | |
| 407 } | |
| 408 } else { | |
| 409 hexValue = readHex(4); | |
| 410 break; | |
| 411 } | |
| 412 default: return ch; | |
| 413 } | |
| 414 | |
| 415 if (hexValue == -1) return -1; | |
| 416 | |
| 417 // According to the Unicode standard the high and low surrogate halves | |
| 418 // used by UTF-16 (U+D800 through U+DFFF) and values above U+10FFFF | |
| 419 // are not legal Unicode values. | |
| 420 if (hexValue < 0xD800 || hexValue > 0xDFFF && hexValue <= 0xFFFF) { | |
| 421 return hexValue; | |
| 422 } else if (hexValue <= 0x10FFFF){ | |
| 423 world.fatal('unicode values greater than 2 bytes not implemented yet'); | |
| 424 return -1; | |
| 425 } else { | |
| 426 return -1; | |
| 427 } | |
| 428 } | |
| 429 | |
| 430 Token finishDot() { | |
| 431 if (TokenizerHelpers.isDigit(_peekChar())) { | |
| 432 eatDigits(); | |
| 433 return finishNumberExtra(TokenKind.DOUBLE); | |
| 434 } else { | |
| 435 return _finishToken(TokenKind.DOT); | |
| 436 } | |
| 437 } | |
| 438 | |
| 439 Token finishIdentifier() { | |
| 440 if (_interpStack != null && _interpStack.depth == -1) { | |
| 441 _interpStack.depth = 0; | |
| 442 while (_index < _text.length) { | |
| 443 if (!TokenizerHelpers.isInterpIdentifierPart( | |
| 444 _text.codeUnitAt(_index++))) { | |
| 445 _index--; | |
| 446 break; | |
| 447 } | |
| 448 } | |
| 449 } else { | |
| 450 while (_index < _text.length) { | |
| 451 if (!TokenizerHelpers.isIdentifierPart(_text.codeUnitAt(_index++))) { | |
| 452 _index--; | |
| 453 break; | |
| 454 } | |
| 455 } | |
| 456 } | |
| 457 int kind = getIdentifierKind(); | |
| 458 if (kind == TokenKind.IDENTIFIER) { | |
| 459 return _finishToken(TokenKind.IDENTIFIER); | |
| 460 } else { | |
| 461 return _finishToken(kind); | |
| 462 } | |
| 463 } | |
| 464 } | |
| 465 | |
| OLD | NEW |