| OLD | NEW |
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 30 matching lines...) Expand all Loading... |
| 41 unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator; | 41 unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator; |
| 42 unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace; | 42 unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace; |
| 43 | 43 |
| 44 | 44 |
| 45 StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_; | 45 StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_; |
| 46 | 46 |
| 47 | 47 |
| 48 // ---------------------------------------------------------------------------- | 48 // ---------------------------------------------------------------------------- |
| 49 // UTF8Buffer | 49 // UTF8Buffer |
| 50 | 50 |
| 51 UTF8Buffer::UTF8Buffer() : data_(NULL) { | 51 UTF8Buffer::UTF8Buffer() { |
| 52 Initialize(NULL, 0); | 52 static const int kInitialCapacity = 1 * KB; |
| 53 data_ = NewArray<char>(kInitialCapacity); |
| 54 limit_ = ComputeLimit(data_, kInitialCapacity); |
| 55 Reset(); |
| 56 ASSERT(Capacity() == kInitialCapacity && pos() == 0); |
| 53 } | 57 } |
| 54 | 58 |
| 55 | 59 |
| 56 UTF8Buffer::~UTF8Buffer() { | 60 UTF8Buffer::~UTF8Buffer() { |
| 57 DeleteArray(data_); | 61 DeleteArray(data_); |
| 58 } | 62 } |
| 59 | 63 |
| 60 | 64 |
| 61 void UTF8Buffer::Initialize(char* src, int length) { | 65 void UTF8Buffer::AddCharSlow(uc32 c) { |
| 62 DeleteArray(data_); | 66 static const int kCapacityGrowthLimit = 1 * MB; |
| 63 data_ = src; | 67 if (cursor_ > limit_) { |
| 64 size_ = length; | 68 int old_capacity = Capacity(); |
| 65 Reset(); | 69 int old_position = pos(); |
| 70 int new_capacity = |
| 71 Min(old_capacity * 2, old_capacity + kCapacityGrowthLimit); |
| 72 char* new_data = NewArray<char>(new_capacity); |
| 73 memcpy(new_data, data_, old_position); |
| 74 DeleteArray(data_); |
| 75 data_ = new_data; |
| 76 cursor_ = new_data + old_position; |
| 77 limit_ = ComputeLimit(new_data, new_capacity); |
| 78 ASSERT(Capacity() == new_capacity && pos() == old_position); |
| 79 } |
| 80 if (static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) { |
| 81 *cursor_++ = c; // Common case: 7-bit ASCII. |
| 82 } else { |
| 83 cursor_ += unibrow::Utf8::Encode(cursor_, c); |
| 84 } |
| 85 ASSERT(pos() <= Capacity()); |
| 66 } | 86 } |
| 67 | 87 |
| 68 | 88 |
| 69 void UTF8Buffer::AddChar(uc32 c) { | |
| 70 const int min_size = 1024; | |
| 71 if (pos_ + static_cast<int>(unibrow::Utf8::kMaxEncodedSize) > size_) { | |
| 72 int new_size = size_ * 2; | |
| 73 if (new_size < min_size) { | |
| 74 new_size = min_size; | |
| 75 } | |
| 76 char* new_data = NewArray<char>(new_size); | |
| 77 memcpy(new_data, data_, pos_); | |
| 78 DeleteArray(data_); | |
| 79 data_ = new_data; | |
| 80 size_ = new_size; | |
| 81 } | |
| 82 if (static_cast<unsigned>(c) < unibrow::Utf8::kMaxOneByteChar) { | |
| 83 data_[pos_++] = c; // common case: 7bit ASCII | |
| 84 } else { | |
| 85 pos_ += unibrow::Utf8::Encode(&data_[pos_], c); | |
| 86 } | |
| 87 ASSERT(pos_ <= size_); | |
| 88 } | |
| 89 | |
| 90 | |
| 91 // ---------------------------------------------------------------------------- | 89 // ---------------------------------------------------------------------------- |
| 92 // UTF16Buffer | 90 // UTF16Buffer |
| 93 | 91 |
| 94 | 92 |
| 95 UTF16Buffer::UTF16Buffer() | 93 UTF16Buffer::UTF16Buffer() |
| 96 : pos_(0), | 94 : pos_(0), |
| 97 pushback_buffer_(0), | 95 pushback_buffer_(0), |
| 98 last_(0), | 96 last_(0), |
| 99 stream_(NULL) { } | 97 stream_(NULL) { } |
| 100 | 98 |
| (...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 165 source_.Initialize(source, stream); | 163 source_.Initialize(source, stream); |
| 166 position_ = position; | 164 position_ = position; |
| 167 | 165 |
| 168 // Reset literals buffer | 166 // Reset literals buffer |
| 169 literals_.Reset(); | 167 literals_.Reset(); |
| 170 | 168 |
| 171 // Set c0_ (one character ahead) | 169 // Set c0_ (one character ahead) |
| 172 ASSERT(kCharacterLookaheadBufferSize == 1); | 170 ASSERT(kCharacterLookaheadBufferSize == 1); |
| 173 Advance(); | 171 Advance(); |
| 174 | 172 |
| 175 // Skip initial whitespace (allowing HTML comment ends) and scan | 173 // Skip initial whitespace allowing HTML comment ends just like |
| 176 // first token. | 174 // after a newline and scan first token. |
| 177 SkipWhiteSpace(true); | 175 has_line_terminator_before_next_ = true; |
| 176 SkipWhiteSpace(); |
| 178 Scan(); | 177 Scan(); |
| 179 } | 178 } |
| 180 | 179 |
| 181 | 180 |
| 182 Handle<String> Scanner::SubString(int start, int end) { | 181 Handle<String> Scanner::SubString(int start, int end) { |
| 183 return source_.SubString(start - position_, end - position_); | 182 return source_.SubString(start - position_, end - position_); |
| 184 } | 183 } |
| 185 | 184 |
| 186 | 185 |
| 187 Token::Value Scanner::Next() { | 186 Token::Value Scanner::Next() { |
| (...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 239 // Unicode character; this implies that in a Unicode context the | 238 // Unicode character; this implies that in a Unicode context the |
| 240 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF | 239 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF |
| 241 // character expressed in little-endian byte order (since it could | 240 // character expressed in little-endian byte order (since it could |
| 242 // not be a U+FFFE character expressed in big-endian byte | 241 // not be a U+FFFE character expressed in big-endian byte |
| 243 // order). Nevertheless, we check for it to be compatible with | 242 // order). Nevertheless, we check for it to be compatible with |
| 244 // Spidermonkey. | 243 // Spidermonkey. |
| 245 return c == 0xFEFF || c == 0xFFFE; | 244 return c == 0xFEFF || c == 0xFFFE; |
| 246 } | 245 } |
| 247 | 246 |
| 248 | 247 |
| 249 void Scanner::SkipWhiteSpace(bool initial) { | 248 bool Scanner::SkipWhiteSpace() { |
| 250 has_line_terminator_before_next_ = initial; | 249 int start_position = source_pos(); |
| 251 | 250 |
| 252 while (true) { | 251 while (true) { |
| 253 // We treat byte-order marks (BOMs) as whitespace for better | 252 // We treat byte-order marks (BOMs) as whitespace for better |
| 254 // compatibility with Spidermonkey and other JavaScript engines. | 253 // compatibility with Spidermonkey and other JavaScript engines. |
| 255 while (kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) { | 254 while (kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) { |
| 256 // IsWhiteSpace() includes line terminators! | 255 // IsWhiteSpace() includes line terminators! |
| 257 if (kIsLineTerminator.get(c0_)) | 256 if (kIsLineTerminator.get(c0_)) { |
| 258 // Ignore line terminators, but remember them. This is necessary | 257 // Ignore line terminators, but remember them. This is necessary |
| 259 // for automatic semicolon insertion. | 258 // for automatic semicolon insertion. |
| 260 has_line_terminator_before_next_ = true; | 259 has_line_terminator_before_next_ = true; |
| 260 } |
| 261 Advance(); | 261 Advance(); |
| 262 } | 262 } |
| 263 | 263 |
| 264 // If there is an HTML comment end '-->' at the beginning of a | 264 // If there is an HTML comment end '-->' at the beginning of a |
| 265 // line (with only whitespace in front of it), we treat the rest | 265 // line (with only whitespace in front of it), we treat the rest |
| 266 // of the line as a comment. This is in line with the way | 266 // of the line as a comment. This is in line with the way |
| 267 // SpiderMonkey handles it. | 267 // SpiderMonkey handles it. |
| 268 if (c0_ == '-' && has_line_terminator_before_next_) { | 268 if (c0_ == '-' && has_line_terminator_before_next_) { |
| 269 Advance(); | 269 Advance(); |
| 270 if (c0_ == '-') { | 270 if (c0_ == '-') { |
| 271 Advance(); | 271 Advance(); |
| 272 if (c0_ == '>') { | 272 if (c0_ == '>') { |
| 273 // Treat the rest of the line as a comment. | 273 // Treat the rest of the line as a comment. |
| 274 SkipSingleLineComment(); | 274 SkipSingleLineComment(); |
| 275 // Continue skipping white space after the comment. | 275 // Continue skipping white space after the comment. |
| 276 continue; | 276 continue; |
| 277 } | 277 } |
| 278 PushBack('-'); // undo Advance() | 278 PushBack('-'); // undo Advance() |
| 279 } | 279 } |
| 280 PushBack('-'); // undo Advance() | 280 PushBack('-'); // undo Advance() |
| 281 } | 281 } |
| 282 return; | 282 // Return whether or not we skipped any characters. |
| 283 return source_pos() != start_position; |
| 283 } | 284 } |
| 284 } | 285 } |
| 285 | 286 |
| 286 | 287 |
| 287 Token::Value Scanner::SkipSingleLineComment() { | 288 Token::Value Scanner::SkipSingleLineComment() { |
| 288 Advance(); | 289 Advance(); |
| 289 | 290 |
| 290 // The line terminator at the end of the line is not considered | 291 // The line terminator at the end of the line is not considered |
| 291 // to be part of the single-line comment; it is recognized | 292 // to be part of the single-line comment; it is recognized |
| 292 // separately by the lexical grammar and becomes part of the | 293 // separately by the lexical grammar and becomes part of the |
| 293 // stream of input elements for the syntactic grammar (see | 294 // stream of input elements for the syntactic grammar (see |
| 294 // ECMA-262, section 7.4, page 12). | 295 // ECMA-262, section 7.4, page 12). |
| 295 while (c0_ >= 0 && !kIsLineTerminator.get(c0_)) { | 296 while (c0_ >= 0 && !kIsLineTerminator.get(c0_)) { |
| 296 Advance(); | 297 Advance(); |
| 297 } | 298 } |
| 298 | 299 |
| 299 return Token::COMMENT; | 300 return Token::WHITESPACE; |
| 300 } | 301 } |
| 301 | 302 |
| 302 | 303 |
| 303 Token::Value Scanner::SkipMultiLineComment() { | 304 Token::Value Scanner::SkipMultiLineComment() { |
| 304 ASSERT(c0_ == '*'); | 305 ASSERT(c0_ == '*'); |
| 305 Advance(); | 306 Advance(); |
| 306 | 307 |
| 307 while (c0_ >= 0) { | 308 while (c0_ >= 0) { |
| 308 char ch = c0_; | 309 char ch = c0_; |
| 309 Advance(); | 310 Advance(); |
| 310 // If we have reached the end of the multi-line comment, we | 311 // If we have reached the end of the multi-line comment, we |
| 311 // consume the '/' and insert a whitespace. This way all | 312 // consume the '/' and insert a whitespace. This way all |
| 312 // multi-line comments are treated as whitespace - even the ones | 313 // multi-line comments are treated as whitespace - even the ones |
| 313 // containing line terminators. This contradicts ECMA-262, section | 314 // containing line terminators. This contradicts ECMA-262, section |
| 314 // 7.4, page 12, that says that multi-line comments containing | 315 // 7.4, page 12, that says that multi-line comments containing |
| 315 // line terminators should be treated as a line terminator, but it | 316 // line terminators should be treated as a line terminator, but it |
| 316 // matches the behaviour of SpiderMonkey and KJS. | 317 // matches the behaviour of SpiderMonkey and KJS. |
| 317 if (ch == '*' && c0_ == '/') { | 318 if (ch == '*' && c0_ == '/') { |
| 318 c0_ = ' '; | 319 c0_ = ' '; |
| 319 return Token::COMMENT; | 320 return Token::WHITESPACE; |
| 320 } | 321 } |
| 321 } | 322 } |
| 322 | 323 |
| 323 // Unterminated multi-line comment. | 324 // Unterminated multi-line comment. |
| 324 return Token::ILLEGAL; | 325 return Token::ILLEGAL; |
| 325 } | 326 } |
| 326 | 327 |
| 327 | 328 |
| 328 Token::Value Scanner::ScanHtmlComment() { | 329 Token::Value Scanner::ScanHtmlComment() { |
| 329 // Check for <!-- comments. | 330 // Check for <!-- comments. |
| 330 ASSERT(c0_ == '!'); | 331 ASSERT(c0_ == '!'); |
| 331 Advance(); | 332 Advance(); |
| 332 if (c0_ == '-') { | 333 if (c0_ == '-') { |
| 333 Advance(); | 334 Advance(); |
| 334 if (c0_ == '-') return SkipSingleLineComment(); | 335 if (c0_ == '-') return SkipSingleLineComment(); |
| 335 PushBack('-'); // undo Advance() | 336 PushBack('-'); // undo Advance() |
| 336 } | 337 } |
| 337 PushBack('!'); // undo Advance() | 338 PushBack('!'); // undo Advance() |
| 338 ASSERT(c0_ == '!'); | 339 ASSERT(c0_ == '!'); |
| 339 return Token::LT; | 340 return Token::LT; |
| 340 } | 341 } |
| 341 | 342 |
| 342 | 343 |
| 343 void Scanner::Scan() { | 344 void Scanner::Scan() { |
| 344 Token::Value token; | 345 Token::Value token; |
| 345 bool has_line_terminator = false; | 346 has_line_terminator_before_next_ = false; |
| 346 do { | 347 do { |
| 347 SkipWhiteSpace(has_line_terminator); | |
| 348 | |
| 349 // Remember the line terminator in previous loop | |
| 350 has_line_terminator = has_line_terminator_before_next(); | |
| 351 | |
| 352 // Remember the position of the next token | 348 // Remember the position of the next token |
| 353 next_.location.beg_pos = source_pos(); | 349 next_.location.beg_pos = source_pos(); |
| 354 | 350 |
| 355 token = ScanToken(); | 351 switch (c0_) { |
| 356 } while (token == Token::COMMENT); | 352 case ' ': |
| 353 case '\t': |
| 354 Advance(); |
| 355 token = Token::WHITESPACE; |
| 356 break; |
| 357 |
| 358 case '\n': |
| 359 Advance(); |
| 360 has_line_terminator_before_next_ = true; |
| 361 token = Token::WHITESPACE; |
| 362 break; |
| 363 |
| 364 case '"': case '\'': |
| 365 token = ScanString(); |
| 366 break; |
| 367 |
| 368 case '<': |
| 369 // < <= << <<= <!-- |
| 370 Advance(); |
| 371 if (c0_ == '=') { |
| 372 token = Select(Token::LTE); |
| 373 } else if (c0_ == '<') { |
| 374 token = Select('=', Token::ASSIGN_SHL, Token::SHL); |
| 375 } else if (c0_ == '!') { |
| 376 token = ScanHtmlComment(); |
| 377 } else { |
| 378 token = Token::LT; |
| 379 } |
| 380 break; |
| 381 |
| 382 case '>': |
| 383 // > >= >> >>= >>> >>>= |
| 384 Advance(); |
| 385 if (c0_ == '=') { |
| 386 token = Select(Token::GTE); |
| 387 } else if (c0_ == '>') { |
| 388 // >> >>= >>> >>>= |
| 389 Advance(); |
| 390 if (c0_ == '=') { |
| 391 token = Select(Token::ASSIGN_SAR); |
| 392 } else if (c0_ == '>') { |
| 393 token = Select('=', Token::ASSIGN_SHR, Token::SHR); |
| 394 } else { |
| 395 token = Token::SAR; |
| 396 } |
| 397 } else { |
| 398 token = Token::GT; |
| 399 } |
| 400 break; |
| 401 |
| 402 case '=': |
| 403 // = == === |
| 404 Advance(); |
| 405 if (c0_ == '=') { |
| 406 token = Select('=', Token::EQ_STRICT, Token::EQ); |
| 407 } else { |
| 408 token = Token::ASSIGN; |
| 409 } |
| 410 break; |
| 411 |
| 412 case '!': |
| 413 // ! != !== |
| 414 Advance(); |
| 415 if (c0_ == '=') { |
| 416 token = Select('=', Token::NE_STRICT, Token::NE); |
| 417 } else { |
| 418 token = Token::NOT; |
| 419 } |
| 420 break; |
| 421 |
| 422 case '+': |
| 423 // + ++ += |
| 424 Advance(); |
| 425 if (c0_ == '+') { |
| 426 token = Select(Token::INC); |
| 427 } else if (c0_ == '=') { |
| 428 token = Select(Token::ASSIGN_ADD); |
| 429 } else { |
| 430 token = Token::ADD; |
| 431 } |
| 432 break; |
| 433 |
| 434 case '-': |
| 435 // - -- --> -= |
| 436 Advance(); |
| 437 if (c0_ == '-') { |
| 438 Advance(); |
| 439 if (c0_ == '>' && has_line_terminator_before_next_) { |
| 440 // For compatibility with SpiderMonkey, we skip lines that |
| 441 // start with an HTML comment end '-->'. |
| 442 token = SkipSingleLineComment(); |
| 443 } else { |
| 444 token = Token::DEC; |
| 445 } |
| 446 } else if (c0_ == '=') { |
| 447 token = Select(Token::ASSIGN_SUB); |
| 448 } else { |
| 449 token = Token::SUB; |
| 450 } |
| 451 break; |
| 452 |
| 453 case '*': |
| 454 // * *= |
| 455 token = Select('=', Token::ASSIGN_MUL, Token::MUL); |
| 456 break; |
| 457 |
| 458 case '%': |
| 459 // % %= |
| 460 token = Select('=', Token::ASSIGN_MOD, Token::MOD); |
| 461 break; |
| 462 |
| 463 case '/': |
| 464 // / // /* /= |
| 465 Advance(); |
| 466 if (c0_ == '/') { |
| 467 token = SkipSingleLineComment(); |
| 468 } else if (c0_ == '*') { |
| 469 token = SkipMultiLineComment(); |
| 470 } else if (c0_ == '=') { |
| 471 token = Select(Token::ASSIGN_DIV); |
| 472 } else { |
| 473 token = Token::DIV; |
| 474 } |
| 475 break; |
| 476 |
| 477 case '&': |
| 478 // & && &= |
| 479 Advance(); |
| 480 if (c0_ == '&') { |
| 481 token = Select(Token::AND); |
| 482 } else if (c0_ == '=') { |
| 483 token = Select(Token::ASSIGN_BIT_AND); |
| 484 } else { |
| 485 token = Token::BIT_AND; |
| 486 } |
| 487 break; |
| 488 |
| 489 case '|': |
| 490 // | || |= |
| 491 Advance(); |
| 492 if (c0_ == '|') { |
| 493 token = Select(Token::OR); |
| 494 } else if (c0_ == '=') { |
| 495 token = Select(Token::ASSIGN_BIT_OR); |
| 496 } else { |
| 497 token = Token::BIT_OR; |
| 498 } |
| 499 break; |
| 500 |
| 501 case '^': |
| 502 // ^ ^= |
| 503 token = Select('=', Token::ASSIGN_BIT_XOR, Token::BIT_XOR); |
| 504 break; |
| 505 |
| 506 case '.': |
| 507 // . Number |
| 508 Advance(); |
| 509 if (IsDecimalDigit(c0_)) { |
| 510 token = ScanNumber(true); |
| 511 } else { |
| 512 token = Token::PERIOD; |
| 513 } |
| 514 break; |
| 515 |
| 516 case ':': |
| 517 token = Select(Token::COLON); |
| 518 break; |
| 519 |
| 520 case ';': |
| 521 token = Select(Token::SEMICOLON); |
| 522 break; |
| 523 |
| 524 case ',': |
| 525 token = Select(Token::COMMA); |
| 526 break; |
| 527 |
| 528 case '(': |
| 529 token = Select(Token::LPAREN); |
| 530 break; |
| 531 |
| 532 case ')': |
| 533 token = Select(Token::RPAREN); |
| 534 break; |
| 535 |
| 536 case '[': |
| 537 token = Select(Token::LBRACK); |
| 538 break; |
| 539 |
| 540 case ']': |
| 541 token = Select(Token::RBRACK); |
| 542 break; |
| 543 |
| 544 case '{': |
| 545 token = Select(Token::LBRACE); |
| 546 break; |
| 547 |
| 548 case '}': |
| 549 token = Select(Token::RBRACE); |
| 550 break; |
| 551 |
| 552 case '?': |
| 553 token = Select(Token::CONDITIONAL); |
| 554 break; |
| 555 |
| 556 case '~': |
| 557 token = Select(Token::BIT_NOT); |
| 558 break; |
| 559 |
| 560 default: |
| 561 if (kIsIdentifierStart.get(c0_)) { |
| 562 token = ScanIdentifier(); |
| 563 } else if (IsDecimalDigit(c0_)) { |
| 564 token = ScanNumber(false); |
| 565 } else if (SkipWhiteSpace()) { |
| 566 token = Token::WHITESPACE; |
| 567 } else if (c0_ < 0) { |
| 568 token = Token::EOS; |
| 569 } else { |
| 570 token = Select(Token::ILLEGAL); |
| 571 } |
| 572 break; |
| 573 } |
| 574 |
| 575 // Continue scanning for tokens as long as we're just skipping |
| 576 // whitespace. |
| 577 } while (token == Token::WHITESPACE); |
| 357 | 578 |
| 358 next_.location.end_pos = source_pos(); | 579 next_.location.end_pos = source_pos(); |
| 359 next_.token = token; | 580 next_.token = token; |
| 360 } | 581 } |
| 361 | 582 |
| 362 | 583 |
| 363 void Scanner::SeekForward(int pos) { | 584 void Scanner::SeekForward(int pos) { |
| 364 source_.SeekForward(pos - 1); | 585 source_.SeekForward(pos - 1); |
| 365 Advance(); | 586 Advance(); |
| 366 Scan(); | 587 Scan(); |
| (...skipping 121 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 488 Advance(); | 709 Advance(); |
| 489 if (c0_ == next) { | 710 if (c0_ == next) { |
| 490 Advance(); | 711 Advance(); |
| 491 return then; | 712 return then; |
| 492 } else { | 713 } else { |
| 493 return else_; | 714 return else_; |
| 494 } | 715 } |
| 495 } | 716 } |
| 496 | 717 |
| 497 | 718 |
| 498 Token::Value Scanner::ScanToken() { | |
| 499 switch (c0_) { | |
| 500 // strings | |
| 501 case '"': case '\'': | |
| 502 return ScanString(); | |
| 503 | |
| 504 case '<': | |
| 505 // < <= << <<= <!-- | |
| 506 Advance(); | |
| 507 if (c0_ == '=') return Select(Token::LTE); | |
| 508 if (c0_ == '<') return Select('=', Token::ASSIGN_SHL, Token::SHL); | |
| 509 if (c0_ == '!') return ScanHtmlComment(); | |
| 510 return Token::LT; | |
| 511 | |
| 512 case '>': | |
| 513 // > >= >> >>= >>> >>>= | |
| 514 Advance(); | |
| 515 if (c0_ == '=') return Select(Token::GTE); | |
| 516 if (c0_ == '>') { | |
| 517 // >> >>= >>> >>>= | |
| 518 Advance(); | |
| 519 if (c0_ == '=') return Select(Token::ASSIGN_SAR); | |
| 520 if (c0_ == '>') return Select('=', Token::ASSIGN_SHR, Token::SHR); | |
| 521 return Token::SAR; | |
| 522 } | |
| 523 return Token::GT; | |
| 524 | |
| 525 case '=': | |
| 526 // = == === | |
| 527 Advance(); | |
| 528 if (c0_ == '=') return Select('=', Token::EQ_STRICT, Token::EQ); | |
| 529 return Token::ASSIGN; | |
| 530 | |
| 531 case '!': | |
| 532 // ! != !== | |
| 533 Advance(); | |
| 534 if (c0_ == '=') return Select('=', Token::NE_STRICT, Token::NE); | |
| 535 return Token::NOT; | |
| 536 | |
| 537 case '+': | |
| 538 // + ++ += | |
| 539 Advance(); | |
| 540 if (c0_ == '+') return Select(Token::INC); | |
| 541 if (c0_ == '=') return Select(Token::ASSIGN_ADD); | |
| 542 return Token::ADD; | |
| 543 | |
| 544 case '-': | |
| 545 // - -- -= | |
| 546 Advance(); | |
| 547 if (c0_ == '-') return Select(Token::DEC); | |
| 548 if (c0_ == '=') return Select(Token::ASSIGN_SUB); | |
| 549 return Token::SUB; | |
| 550 | |
| 551 case '*': | |
| 552 // * *= | |
| 553 return Select('=', Token::ASSIGN_MUL, Token::MUL); | |
| 554 | |
| 555 case '%': | |
| 556 // % %= | |
| 557 return Select('=', Token::ASSIGN_MOD, Token::MOD); | |
| 558 | |
| 559 case '/': | |
| 560 // / // /* /= | |
| 561 Advance(); | |
| 562 if (c0_ == '/') return SkipSingleLineComment(); | |
| 563 if (c0_ == '*') return SkipMultiLineComment(); | |
| 564 if (c0_ == '=') return Select(Token::ASSIGN_DIV); | |
| 565 return Token::DIV; | |
| 566 | |
| 567 case '&': | |
| 568 // & && &= | |
| 569 Advance(); | |
| 570 if (c0_ == '&') return Select(Token::AND); | |
| 571 if (c0_ == '=') return Select(Token::ASSIGN_BIT_AND); | |
| 572 return Token::BIT_AND; | |
| 573 | |
| 574 case '|': | |
| 575 // | || |= | |
| 576 Advance(); | |
| 577 if (c0_ == '|') return Select(Token::OR); | |
| 578 if (c0_ == '=') return Select(Token::ASSIGN_BIT_OR); | |
| 579 return Token::BIT_OR; | |
| 580 | |
| 581 case '^': | |
| 582 // ^ ^= | |
| 583 return Select('=', Token::ASSIGN_BIT_XOR, Token::BIT_XOR); | |
| 584 | |
| 585 case '.': | |
| 586 // . Number | |
| 587 Advance(); | |
| 588 if (IsDecimalDigit(c0_)) return ScanNumber(true); | |
| 589 return Token::PERIOD; | |
| 590 | |
| 591 case ':': | |
| 592 return Select(Token::COLON); | |
| 593 | |
| 594 case ';': | |
| 595 return Select(Token::SEMICOLON); | |
| 596 | |
| 597 case ',': | |
| 598 return Select(Token::COMMA); | |
| 599 | |
| 600 case '(': | |
| 601 return Select(Token::LPAREN); | |
| 602 | |
| 603 case ')': | |
| 604 return Select(Token::RPAREN); | |
| 605 | |
| 606 case '[': | |
| 607 return Select(Token::LBRACK); | |
| 608 | |
| 609 case ']': | |
| 610 return Select(Token::RBRACK); | |
| 611 | |
| 612 case '{': | |
| 613 return Select(Token::LBRACE); | |
| 614 | |
| 615 case '}': | |
| 616 return Select(Token::RBRACE); | |
| 617 | |
| 618 case '?': | |
| 619 return Select(Token::CONDITIONAL); | |
| 620 | |
| 621 case '~': | |
| 622 return Select(Token::BIT_NOT); | |
| 623 | |
| 624 default: | |
| 625 if (kIsIdentifierStart.get(c0_)) | |
| 626 return ScanIdentifier(); | |
| 627 if (IsDecimalDigit(c0_)) | |
| 628 return ScanNumber(false); | |
| 629 if (c0_ < 0) | |
| 630 return Token::EOS; | |
| 631 return Select(Token::ILLEGAL); | |
| 632 } | |
| 633 | |
| 634 UNREACHABLE(); | |
| 635 return Token::ILLEGAL; | |
| 636 } | |
| 637 | |
| 638 | |
| 639 // Returns true if any decimal digits were scanned, returns false otherwise. | 719 // Returns true if any decimal digits were scanned, returns false otherwise. |
| 640 void Scanner::ScanDecimalDigits() { | 720 void Scanner::ScanDecimalDigits() { |
| 641 while (IsDecimalDigit(c0_)) | 721 while (IsDecimalDigit(c0_)) |
| 642 AddCharAdvance(); | 722 AddCharAdvance(); |
| 643 } | 723 } |
| 644 | 724 |
| 645 | 725 |
| 646 Token::Value Scanner::ScanNumber(bool seen_period) { | 726 Token::Value Scanner::ScanNumber(bool seen_period) { |
| 647 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction | 727 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction |
| 648 | 728 |
| (...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 727 uc32 c = ScanHexEscape('u', 4); | 807 uc32 c = ScanHexEscape('u', 4); |
| 728 // We do not allow a unicode escape sequence to start another | 808 // We do not allow a unicode escape sequence to start another |
| 729 // unicode escape sequence. | 809 // unicode escape sequence. |
| 730 if (c == '\\') return unibrow::Utf8::kBadChar; | 810 if (c == '\\') return unibrow::Utf8::kBadChar; |
| 731 return c; | 811 return c; |
| 732 } | 812 } |
| 733 | 813 |
| 734 | 814 |
| 735 Token::Value Scanner::ScanIdentifier() { | 815 Token::Value Scanner::ScanIdentifier() { |
| 736 ASSERT(kIsIdentifierStart.get(c0_)); | 816 ASSERT(kIsIdentifierStart.get(c0_)); |
| 737 | |
| 738 bool has_escapes = false; | 817 bool has_escapes = false; |
| 739 | 818 |
| 740 StartLiteral(); | 819 StartLiteral(); |
| 741 // Scan identifier start character. | 820 // Scan identifier start character. |
| 742 if (c0_ == '\\') { | 821 if (c0_ == '\\') { |
| 743 has_escapes = true; | 822 has_escapes = true; |
| 744 uc32 c = ScanIdentifierUnicodeEscape(); | 823 uc32 c = ScanIdentifierUnicodeEscape(); |
| 745 // Only allow legal identifier start characters. | 824 // Only allow legal identifier start characters. |
| 746 if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL; | 825 if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL; |
| 747 AddChar(c); | 826 AddChar(c); |
| 748 } else { | 827 } else { |
| 749 AddCharAdvance(); | 828 AddChar(c0_); |
| 829 Advance(); |
| 750 } | 830 } |
| 831 |
| 751 // Scan the rest of the identifier characters. | 832 // Scan the rest of the identifier characters. |
| 752 while (kIsIdentifierPart.get(c0_)) { | 833 while (kIsIdentifierPart.get(c0_)) { |
| 753 if (c0_ == '\\') { | 834 if (c0_ == '\\') { |
| 754 has_escapes = true; | 835 has_escapes = true; |
| 755 uc32 c = ScanIdentifierUnicodeEscape(); | 836 uc32 c = ScanIdentifierUnicodeEscape(); |
| 756 // Only allow legal identifier part characters. | 837 // Only allow legal identifier part characters. |
| 757 if (!kIsIdentifierPart.get(c)) return Token::ILLEGAL; | 838 if (!kIsIdentifierPart.get(c)) return Token::ILLEGAL; |
| 758 AddChar(c); | 839 AddChar(c); |
| 759 } else { | 840 } else { |
| 760 AddCharAdvance(); | 841 AddChar(c0_); |
| 842 Advance(); |
| 761 } | 843 } |
| 762 } | 844 } |
| 763 TerminateLiteral(); | 845 TerminateLiteral(); |
| 764 | 846 |
| 765 // We don't have any 1-letter keywords (this is probably a common case). | 847 // We don't have any 1-letter keywords (this is probably a common case). |
| 766 if ((next_.literal_end - next_.literal_pos) == 1) | 848 if ((next_.literal_end - next_.literal_pos) == 1) { |
| 767 return Token::IDENTIFIER; | 849 return Token::IDENTIFIER; |
| 850 } |
| 768 | 851 |
| 769 // If the identifier contains unicode escapes, it must not be | 852 // If the identifier contains unicode escapes, it must not be |
| 770 // resolved to a keyword. | 853 // resolved to a keyword. |
| 771 if (has_escapes) | 854 if (has_escapes) { |
| 772 return Token::IDENTIFIER; | 855 return Token::IDENTIFIER; |
| 856 } |
| 773 | 857 |
| 774 return Token::Lookup(&literals_.data()[next_.literal_pos]); | 858 return Token::Lookup(&literals_.data()[next_.literal_pos]); |
| 775 } | 859 } |
| 776 | 860 |
| 777 | 861 |
| 778 | 862 |
| 779 bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) { | 863 bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) { |
| 780 // Checks whether the buffer contains an identifier (no escape). | 864 // Checks whether the buffer contains an identifier (no escape). |
| 781 if (!buffer->has_more()) return false; | 865 if (!buffer->has_more()) return false; |
| 782 if (!kIsIdentifierStart.get(buffer->GetNext())) return false; | 866 if (!kIsIdentifierStart.get(buffer->GetNext())) return false; |
| (...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 841 } | 925 } |
| 842 AddCharAdvance(); | 926 AddCharAdvance(); |
| 843 } | 927 } |
| 844 TerminateLiteral(); | 928 TerminateLiteral(); |
| 845 | 929 |
| 846 next_.location.end_pos = source_pos() - 1; | 930 next_.location.end_pos = source_pos() - 1; |
| 847 return true; | 931 return true; |
| 848 } | 932 } |
| 849 | 933 |
| 850 } } // namespace v8::internal | 934 } } // namespace v8::internal |
| OLD | NEW |