OLD | NEW |
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 30 matching lines...) Expand all Loading... |
41 unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator; | 41 unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator; |
42 unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace; | 42 unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace; |
43 | 43 |
44 | 44 |
45 StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_; | 45 StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_; |
46 | 46 |
47 | 47 |
48 // ---------------------------------------------------------------------------- | 48 // ---------------------------------------------------------------------------- |
49 // UTF8Buffer | 49 // UTF8Buffer |
50 | 50 |
51 UTF8Buffer::UTF8Buffer() : data_(NULL) { | 51 UTF8Buffer::UTF8Buffer() { |
52 Initialize(NULL, 0); | 52 static const int kInitialCapacity = 1 * KB; |
| 53 data_ = NewArray<char>(kInitialCapacity); |
| 54 limit_ = ComputeLimit(data_, kInitialCapacity); |
| 55 Reset(); |
| 56 ASSERT(Capacity() == kInitialCapacity && pos() == 0); |
53 } | 57 } |
54 | 58 |
55 | 59 |
56 UTF8Buffer::~UTF8Buffer() { | 60 UTF8Buffer::~UTF8Buffer() { |
57 DeleteArray(data_); | 61 DeleteArray(data_); |
58 } | 62 } |
59 | 63 |
60 | 64 |
61 void UTF8Buffer::Initialize(char* src, int length) { | 65 void UTF8Buffer::AddCharSlow(uc32 c) { |
62 DeleteArray(data_); | 66 static const int kCapacityGrowthLimit = 1 * MB; |
63 data_ = src; | 67 if (cursor_ > limit_) { |
64 size_ = length; | 68 int old_capacity = Capacity(); |
65 Reset(); | 69 int old_position = pos(); |
| 70 int new_capacity = |
| 71 Min(old_capacity * 2, old_capacity + kCapacityGrowthLimit); |
| 72 char* new_data = NewArray<char>(new_capacity); |
| 73 memcpy(new_data, data_, old_position); |
| 74 DeleteArray(data_); |
| 75 data_ = new_data; |
| 76 cursor_ = new_data + old_position; |
| 77 limit_ = ComputeLimit(new_data, new_capacity); |
| 78 ASSERT(Capacity() == new_capacity && pos() == old_position); |
| 79 } |
| 80 if (static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) { |
| 81 *cursor_++ = c; // Common case: 7-bit ASCII. |
| 82 } else { |
| 83 cursor_ += unibrow::Utf8::Encode(cursor_, c); |
| 84 } |
| 85 ASSERT(pos() <= Capacity()); |
66 } | 86 } |
67 | 87 |
68 | 88 |
69 void UTF8Buffer::AddChar(uc32 c) { | |
70 const int min_size = 1024; | |
71 if (pos_ + static_cast<int>(unibrow::Utf8::kMaxEncodedSize) > size_) { | |
72 int new_size = size_ * 2; | |
73 if (new_size < min_size) { | |
74 new_size = min_size; | |
75 } | |
76 char* new_data = NewArray<char>(new_size); | |
77 memcpy(new_data, data_, pos_); | |
78 DeleteArray(data_); | |
79 data_ = new_data; | |
80 size_ = new_size; | |
81 } | |
82 if (static_cast<unsigned>(c) < unibrow::Utf8::kMaxOneByteChar) { | |
83 data_[pos_++] = c; // common case: 7bit ASCII | |
84 } else { | |
85 pos_ += unibrow::Utf8::Encode(&data_[pos_], c); | |
86 } | |
87 ASSERT(pos_ <= size_); | |
88 } | |
89 | |
90 | |
91 // ---------------------------------------------------------------------------- | 89 // ---------------------------------------------------------------------------- |
92 // UTF16Buffer | 90 // UTF16Buffer |
93 | 91 |
94 | 92 |
95 UTF16Buffer::UTF16Buffer() | 93 UTF16Buffer::UTF16Buffer() |
96 : pos_(0), | 94 : pos_(0), |
97 pushback_buffer_(0), | 95 pushback_buffer_(0), |
98 last_(0), | 96 last_(0), |
99 stream_(NULL) { } | 97 stream_(NULL) { } |
100 | 98 |
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
165 source_.Initialize(source, stream); | 163 source_.Initialize(source, stream); |
166 position_ = position; | 164 position_ = position; |
167 | 165 |
168 // Reset literals buffer | 166 // Reset literals buffer |
169 literals_.Reset(); | 167 literals_.Reset(); |
170 | 168 |
171 // Set c0_ (one character ahead) | 169 // Set c0_ (one character ahead) |
172 ASSERT(kCharacterLookaheadBufferSize == 1); | 170 ASSERT(kCharacterLookaheadBufferSize == 1); |
173 Advance(); | 171 Advance(); |
174 | 172 |
175 // Skip initial whitespace (allowing HTML comment ends) and scan | 173 // Skip initial whitespace allowing HTML comment ends just like |
176 // first token. | 174 // after a newline and scan first token. |
177 SkipWhiteSpace(true); | 175 has_line_terminator_before_next_ = true; |
| 176 SkipWhiteSpace(); |
178 Scan(); | 177 Scan(); |
179 } | 178 } |
180 | 179 |
181 | 180 |
182 Handle<String> Scanner::SubString(int start, int end) { | 181 Handle<String> Scanner::SubString(int start, int end) { |
183 return source_.SubString(start - position_, end - position_); | 182 return source_.SubString(start - position_, end - position_); |
184 } | 183 } |
185 | 184 |
186 | 185 |
187 Token::Value Scanner::Next() { | 186 Token::Value Scanner::Next() { |
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
239 // Unicode character; this implies that in a Unicode context the | 238 // Unicode character; this implies that in a Unicode context the |
240 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF | 239 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF |
241 // character expressed in little-endian byte order (since it could | 240 // character expressed in little-endian byte order (since it could |
242 // not be a U+FFFE character expressed in big-endian byte | 241 // not be a U+FFFE character expressed in big-endian byte |
243 // order). Nevertheless, we check for it to be compatible with | 242 // order). Nevertheless, we check for it to be compatible with |
244 // Spidermonkey. | 243 // Spidermonkey. |
245 return c == 0xFEFF || c == 0xFFFE; | 244 return c == 0xFEFF || c == 0xFFFE; |
246 } | 245 } |
247 | 246 |
248 | 247 |
249 void Scanner::SkipWhiteSpace(bool initial) { | 248 bool Scanner::SkipWhiteSpace() { |
250 has_line_terminator_before_next_ = initial; | 249 int start_position = source_pos(); |
251 | 250 |
252 while (true) { | 251 while (true) { |
253 // We treat byte-order marks (BOMs) as whitespace for better | 252 // We treat byte-order marks (BOMs) as whitespace for better |
254 // compatibility with Spidermonkey and other JavaScript engines. | 253 // compatibility with Spidermonkey and other JavaScript engines. |
255 while (kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) { | 254 while (kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) { |
256 // IsWhiteSpace() includes line terminators! | 255 // IsWhiteSpace() includes line terminators! |
257 if (kIsLineTerminator.get(c0_)) | 256 if (kIsLineTerminator.get(c0_)) { |
258 // Ignore line terminators, but remember them. This is necessary | 257 // Ignore line terminators, but remember them. This is necessary |
259 // for automatic semicolon insertion. | 258 // for automatic semicolon insertion. |
260 has_line_terminator_before_next_ = true; | 259 has_line_terminator_before_next_ = true; |
| 260 } |
261 Advance(); | 261 Advance(); |
262 } | 262 } |
263 | 263 |
264 // If there is an HTML comment end '-->' at the beginning of a | 264 // If there is an HTML comment end '-->' at the beginning of a |
265 // line (with only whitespace in front of it), we treat the rest | 265 // line (with only whitespace in front of it), we treat the rest |
266 // of the line as a comment. This is in line with the way | 266 // of the line as a comment. This is in line with the way |
267 // SpiderMonkey handles it. | 267 // SpiderMonkey handles it. |
268 if (c0_ == '-' && has_line_terminator_before_next_) { | 268 if (c0_ == '-' && has_line_terminator_before_next_) { |
269 Advance(); | 269 Advance(); |
270 if (c0_ == '-') { | 270 if (c0_ == '-') { |
271 Advance(); | 271 Advance(); |
272 if (c0_ == '>') { | 272 if (c0_ == '>') { |
273 // Treat the rest of the line as a comment. | 273 // Treat the rest of the line as a comment. |
274 SkipSingleLineComment(); | 274 SkipSingleLineComment(); |
275 // Continue skipping white space after the comment. | 275 // Continue skipping white space after the comment. |
276 continue; | 276 continue; |
277 } | 277 } |
278 PushBack('-'); // undo Advance() | 278 PushBack('-'); // undo Advance() |
279 } | 279 } |
280 PushBack('-'); // undo Advance() | 280 PushBack('-'); // undo Advance() |
281 } | 281 } |
282 return; | 282 // Return whether or not we skipped any characters. |
| 283 return source_pos() != start_position; |
283 } | 284 } |
284 } | 285 } |
285 | 286 |
286 | 287 |
287 Token::Value Scanner::SkipSingleLineComment() { | 288 Token::Value Scanner::SkipSingleLineComment() { |
288 Advance(); | 289 Advance(); |
289 | 290 |
290 // The line terminator at the end of the line is not considered | 291 // The line terminator at the end of the line is not considered |
291 // to be part of the single-line comment; it is recognized | 292 // to be part of the single-line comment; it is recognized |
292 // separately by the lexical grammar and becomes part of the | 293 // separately by the lexical grammar and becomes part of the |
293 // stream of input elements for the syntactic grammar (see | 294 // stream of input elements for the syntactic grammar (see |
294 // ECMA-262, section 7.4, page 12). | 295 // ECMA-262, section 7.4, page 12). |
295 while (c0_ >= 0 && !kIsLineTerminator.get(c0_)) { | 296 while (c0_ >= 0 && !kIsLineTerminator.get(c0_)) { |
296 Advance(); | 297 Advance(); |
297 } | 298 } |
298 | 299 |
299 return Token::COMMENT; | 300 return Token::WHITESPACE; |
300 } | 301 } |
301 | 302 |
302 | 303 |
303 Token::Value Scanner::SkipMultiLineComment() { | 304 Token::Value Scanner::SkipMultiLineComment() { |
304 ASSERT(c0_ == '*'); | 305 ASSERT(c0_ == '*'); |
305 Advance(); | 306 Advance(); |
306 | 307 |
307 while (c0_ >= 0) { | 308 while (c0_ >= 0) { |
308 char ch = c0_; | 309 char ch = c0_; |
309 Advance(); | 310 Advance(); |
310 // If we have reached the end of the multi-line comment, we | 311 // If we have reached the end of the multi-line comment, we |
311 // consume the '/' and insert a whitespace. This way all | 312 // consume the '/' and insert a whitespace. This way all |
312 // multi-line comments are treated as whitespace - even the ones | 313 // multi-line comments are treated as whitespace - even the ones |
313 // containing line terminators. This contradicts ECMA-262, section | 314 // containing line terminators. This contradicts ECMA-262, section |
314 // 7.4, page 12, that says that multi-line comments containing | 315 // 7.4, page 12, that says that multi-line comments containing |
315 // line terminators should be treated as a line terminator, but it | 316 // line terminators should be treated as a line terminator, but it |
316 // matches the behaviour of SpiderMonkey and KJS. | 317 // matches the behaviour of SpiderMonkey and KJS. |
317 if (ch == '*' && c0_ == '/') { | 318 if (ch == '*' && c0_ == '/') { |
318 c0_ = ' '; | 319 c0_ = ' '; |
319 return Token::COMMENT; | 320 return Token::WHITESPACE; |
320 } | 321 } |
321 } | 322 } |
322 | 323 |
323 // Unterminated multi-line comment. | 324 // Unterminated multi-line comment. |
324 return Token::ILLEGAL; | 325 return Token::ILLEGAL; |
325 } | 326 } |
326 | 327 |
327 | 328 |
328 Token::Value Scanner::ScanHtmlComment() { | 329 Token::Value Scanner::ScanHtmlComment() { |
329 // Check for <!-- comments. | 330 // Check for <!-- comments. |
330 ASSERT(c0_ == '!'); | 331 ASSERT(c0_ == '!'); |
331 Advance(); | 332 Advance(); |
332 if (c0_ == '-') { | 333 if (c0_ == '-') { |
333 Advance(); | 334 Advance(); |
334 if (c0_ == '-') return SkipSingleLineComment(); | 335 if (c0_ == '-') return SkipSingleLineComment(); |
335 PushBack('-'); // undo Advance() | 336 PushBack('-'); // undo Advance() |
336 } | 337 } |
337 PushBack('!'); // undo Advance() | 338 PushBack('!'); // undo Advance() |
338 ASSERT(c0_ == '!'); | 339 ASSERT(c0_ == '!'); |
339 return Token::LT; | 340 return Token::LT; |
340 } | 341 } |
341 | 342 |
342 | 343 |
343 void Scanner::Scan() { | 344 void Scanner::Scan() { |
344 Token::Value token; | 345 Token::Value token; |
345 bool has_line_terminator = false; | 346 has_line_terminator_before_next_ = false; |
346 do { | 347 do { |
347 SkipWhiteSpace(has_line_terminator); | |
348 | |
349 // Remember the line terminator in previous loop | |
350 has_line_terminator = has_line_terminator_before_next(); | |
351 | |
352 // Remember the position of the next token | 348 // Remember the position of the next token |
353 next_.location.beg_pos = source_pos(); | 349 next_.location.beg_pos = source_pos(); |
354 | 350 |
355 token = ScanToken(); | 351 switch (c0_) { |
356 } while (token == Token::COMMENT); | 352 case ' ': |
| 353 case '\t': |
| 354 Advance(); |
| 355 token = Token::WHITESPACE; |
| 356 break; |
| 357 |
| 358 case '\n': |
| 359 Advance(); |
| 360 has_line_terminator_before_next_ = true; |
| 361 token = Token::WHITESPACE; |
| 362 break; |
| 363 |
| 364 case '"': case '\'': |
| 365 token = ScanString(); |
| 366 break; |
| 367 |
| 368 case '<': |
| 369 // < <= << <<= <!-- |
| 370 Advance(); |
| 371 if (c0_ == '=') { |
| 372 token = Select(Token::LTE); |
| 373 } else if (c0_ == '<') { |
| 374 token = Select('=', Token::ASSIGN_SHL, Token::SHL); |
| 375 } else if (c0_ == '!') { |
| 376 token = ScanHtmlComment(); |
| 377 } else { |
| 378 token = Token::LT; |
| 379 } |
| 380 break; |
| 381 |
| 382 case '>': |
| 383 // > >= >> >>= >>> >>>= |
| 384 Advance(); |
| 385 if (c0_ == '=') { |
| 386 token = Select(Token::GTE); |
| 387 } else if (c0_ == '>') { |
| 388 // >> >>= >>> >>>= |
| 389 Advance(); |
| 390 if (c0_ == '=') { |
| 391 token = Select(Token::ASSIGN_SAR); |
| 392 } else if (c0_ == '>') { |
| 393 token = Select('=', Token::ASSIGN_SHR, Token::SHR); |
| 394 } else { |
| 395 token = Token::SAR; |
| 396 } |
| 397 } else { |
| 398 token = Token::GT; |
| 399 } |
| 400 break; |
| 401 |
| 402 case '=': |
| 403 // = == === |
| 404 Advance(); |
| 405 if (c0_ == '=') { |
| 406 token = Select('=', Token::EQ_STRICT, Token::EQ); |
| 407 } else { |
| 408 token = Token::ASSIGN; |
| 409 } |
| 410 break; |
| 411 |
| 412 case '!': |
| 413 // ! != !== |
| 414 Advance(); |
| 415 if (c0_ == '=') { |
| 416 token = Select('=', Token::NE_STRICT, Token::NE); |
| 417 } else { |
| 418 token = Token::NOT; |
| 419 } |
| 420 break; |
| 421 |
| 422 case '+': |
| 423 // + ++ += |
| 424 Advance(); |
| 425 if (c0_ == '+') { |
| 426 token = Select(Token::INC); |
| 427 } else if (c0_ == '=') { |
| 428 token = Select(Token::ASSIGN_ADD); |
| 429 } else { |
| 430 token = Token::ADD; |
| 431 } |
| 432 break; |
| 433 |
| 434 case '-': |
| 435 // - -- --> -= |
| 436 Advance(); |
| 437 if (c0_ == '-') { |
| 438 Advance(); |
| 439 if (c0_ == '>' && has_line_terminator_before_next_) { |
| 440 // For compatibility with SpiderMonkey, we skip lines that |
| 441 // start with an HTML comment end '-->'. |
| 442 token = SkipSingleLineComment(); |
| 443 } else { |
| 444 token = Token::DEC; |
| 445 } |
| 446 } else if (c0_ == '=') { |
| 447 token = Select(Token::ASSIGN_SUB); |
| 448 } else { |
| 449 token = Token::SUB; |
| 450 } |
| 451 break; |
| 452 |
| 453 case '*': |
| 454 // * *= |
| 455 token = Select('=', Token::ASSIGN_MUL, Token::MUL); |
| 456 break; |
| 457 |
| 458 case '%': |
| 459 // % %= |
| 460 token = Select('=', Token::ASSIGN_MOD, Token::MOD); |
| 461 break; |
| 462 |
| 463 case '/': |
| 464 // / // /* /= |
| 465 Advance(); |
| 466 if (c0_ == '/') { |
| 467 token = SkipSingleLineComment(); |
| 468 } else if (c0_ == '*') { |
| 469 token = SkipMultiLineComment(); |
| 470 } else if (c0_ == '=') { |
| 471 token = Select(Token::ASSIGN_DIV); |
| 472 } else { |
| 473 token = Token::DIV; |
| 474 } |
| 475 break; |
| 476 |
| 477 case '&': |
| 478 // & && &= |
| 479 Advance(); |
| 480 if (c0_ == '&') { |
| 481 token = Select(Token::AND); |
| 482 } else if (c0_ == '=') { |
| 483 token = Select(Token::ASSIGN_BIT_AND); |
| 484 } else { |
| 485 token = Token::BIT_AND; |
| 486 } |
| 487 break; |
| 488 |
| 489 case '|': |
| 490 // | || |= |
| 491 Advance(); |
| 492 if (c0_ == '|') { |
| 493 token = Select(Token::OR); |
| 494 } else if (c0_ == '=') { |
| 495 token = Select(Token::ASSIGN_BIT_OR); |
| 496 } else { |
| 497 token = Token::BIT_OR; |
| 498 } |
| 499 break; |
| 500 |
| 501 case '^': |
| 502 // ^ ^= |
| 503 token = Select('=', Token::ASSIGN_BIT_XOR, Token::BIT_XOR); |
| 504 break; |
| 505 |
| 506 case '.': |
| 507 // . Number |
| 508 Advance(); |
| 509 if (IsDecimalDigit(c0_)) { |
| 510 token = ScanNumber(true); |
| 511 } else { |
| 512 token = Token::PERIOD; |
| 513 } |
| 514 break; |
| 515 |
| 516 case ':': |
| 517 token = Select(Token::COLON); |
| 518 break; |
| 519 |
| 520 case ';': |
| 521 token = Select(Token::SEMICOLON); |
| 522 break; |
| 523 |
| 524 case ',': |
| 525 token = Select(Token::COMMA); |
| 526 break; |
| 527 |
| 528 case '(': |
| 529 token = Select(Token::LPAREN); |
| 530 break; |
| 531 |
| 532 case ')': |
| 533 token = Select(Token::RPAREN); |
| 534 break; |
| 535 |
| 536 case '[': |
| 537 token = Select(Token::LBRACK); |
| 538 break; |
| 539 |
| 540 case ']': |
| 541 token = Select(Token::RBRACK); |
| 542 break; |
| 543 |
| 544 case '{': |
| 545 token = Select(Token::LBRACE); |
| 546 break; |
| 547 |
| 548 case '}': |
| 549 token = Select(Token::RBRACE); |
| 550 break; |
| 551 |
| 552 case '?': |
| 553 token = Select(Token::CONDITIONAL); |
| 554 break; |
| 555 |
| 556 case '~': |
| 557 token = Select(Token::BIT_NOT); |
| 558 break; |
| 559 |
| 560 default: |
| 561 if (kIsIdentifierStart.get(c0_)) { |
| 562 token = ScanIdentifier(); |
| 563 } else if (IsDecimalDigit(c0_)) { |
| 564 token = ScanNumber(false); |
| 565 } else if (SkipWhiteSpace()) { |
| 566 token = Token::WHITESPACE; |
| 567 } else if (c0_ < 0) { |
| 568 token = Token::EOS; |
| 569 } else { |
| 570 token = Select(Token::ILLEGAL); |
| 571 } |
| 572 break; |
| 573 } |
| 574 |
| 575 // Continue scanning for tokens as long as we're just skipping |
| 576 // whitespace. |
| 577 } while (token == Token::WHITESPACE); |
357 | 578 |
358 next_.location.end_pos = source_pos(); | 579 next_.location.end_pos = source_pos(); |
359 next_.token = token; | 580 next_.token = token; |
360 } | 581 } |
361 | 582 |
362 | 583 |
363 void Scanner::SeekForward(int pos) { | 584 void Scanner::SeekForward(int pos) { |
364 source_.SeekForward(pos - 1); | 585 source_.SeekForward(pos - 1); |
365 Advance(); | 586 Advance(); |
366 Scan(); | 587 Scan(); |
(...skipping 121 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
488 Advance(); | 709 Advance(); |
489 if (c0_ == next) { | 710 if (c0_ == next) { |
490 Advance(); | 711 Advance(); |
491 return then; | 712 return then; |
492 } else { | 713 } else { |
493 return else_; | 714 return else_; |
494 } | 715 } |
495 } | 716 } |
496 | 717 |
497 | 718 |
498 Token::Value Scanner::ScanToken() { | |
499 switch (c0_) { | |
500 // strings | |
501 case '"': case '\'': | |
502 return ScanString(); | |
503 | |
504 case '<': | |
505 // < <= << <<= <!-- | |
506 Advance(); | |
507 if (c0_ == '=') return Select(Token::LTE); | |
508 if (c0_ == '<') return Select('=', Token::ASSIGN_SHL, Token::SHL); | |
509 if (c0_ == '!') return ScanHtmlComment(); | |
510 return Token::LT; | |
511 | |
512 case '>': | |
513 // > >= >> >>= >>> >>>= | |
514 Advance(); | |
515 if (c0_ == '=') return Select(Token::GTE); | |
516 if (c0_ == '>') { | |
517 // >> >>= >>> >>>= | |
518 Advance(); | |
519 if (c0_ == '=') return Select(Token::ASSIGN_SAR); | |
520 if (c0_ == '>') return Select('=', Token::ASSIGN_SHR, Token::SHR); | |
521 return Token::SAR; | |
522 } | |
523 return Token::GT; | |
524 | |
525 case '=': | |
526 // = == === | |
527 Advance(); | |
528 if (c0_ == '=') return Select('=', Token::EQ_STRICT, Token::EQ); | |
529 return Token::ASSIGN; | |
530 | |
531 case '!': | |
532 // ! != !== | |
533 Advance(); | |
534 if (c0_ == '=') return Select('=', Token::NE_STRICT, Token::NE); | |
535 return Token::NOT; | |
536 | |
537 case '+': | |
538 // + ++ += | |
539 Advance(); | |
540 if (c0_ == '+') return Select(Token::INC); | |
541 if (c0_ == '=') return Select(Token::ASSIGN_ADD); | |
542 return Token::ADD; | |
543 | |
544 case '-': | |
545 // - -- -= | |
546 Advance(); | |
547 if (c0_ == '-') return Select(Token::DEC); | |
548 if (c0_ == '=') return Select(Token::ASSIGN_SUB); | |
549 return Token::SUB; | |
550 | |
551 case '*': | |
552 // * *= | |
553 return Select('=', Token::ASSIGN_MUL, Token::MUL); | |
554 | |
555 case '%': | |
556 // % %= | |
557 return Select('=', Token::ASSIGN_MOD, Token::MOD); | |
558 | |
559 case '/': | |
560 // / // /* /= | |
561 Advance(); | |
562 if (c0_ == '/') return SkipSingleLineComment(); | |
563 if (c0_ == '*') return SkipMultiLineComment(); | |
564 if (c0_ == '=') return Select(Token::ASSIGN_DIV); | |
565 return Token::DIV; | |
566 | |
567 case '&': | |
568 // & && &= | |
569 Advance(); | |
570 if (c0_ == '&') return Select(Token::AND); | |
571 if (c0_ == '=') return Select(Token::ASSIGN_BIT_AND); | |
572 return Token::BIT_AND; | |
573 | |
574 case '|': | |
575 // | || |= | |
576 Advance(); | |
577 if (c0_ == '|') return Select(Token::OR); | |
578 if (c0_ == '=') return Select(Token::ASSIGN_BIT_OR); | |
579 return Token::BIT_OR; | |
580 | |
581 case '^': | |
582 // ^ ^= | |
583 return Select('=', Token::ASSIGN_BIT_XOR, Token::BIT_XOR); | |
584 | |
585 case '.': | |
586 // . Number | |
587 Advance(); | |
588 if (IsDecimalDigit(c0_)) return ScanNumber(true); | |
589 return Token::PERIOD; | |
590 | |
591 case ':': | |
592 return Select(Token::COLON); | |
593 | |
594 case ';': | |
595 return Select(Token::SEMICOLON); | |
596 | |
597 case ',': | |
598 return Select(Token::COMMA); | |
599 | |
600 case '(': | |
601 return Select(Token::LPAREN); | |
602 | |
603 case ')': | |
604 return Select(Token::RPAREN); | |
605 | |
606 case '[': | |
607 return Select(Token::LBRACK); | |
608 | |
609 case ']': | |
610 return Select(Token::RBRACK); | |
611 | |
612 case '{': | |
613 return Select(Token::LBRACE); | |
614 | |
615 case '}': | |
616 return Select(Token::RBRACE); | |
617 | |
618 case '?': | |
619 return Select(Token::CONDITIONAL); | |
620 | |
621 case '~': | |
622 return Select(Token::BIT_NOT); | |
623 | |
624 default: | |
625 if (kIsIdentifierStart.get(c0_)) | |
626 return ScanIdentifier(); | |
627 if (IsDecimalDigit(c0_)) | |
628 return ScanNumber(false); | |
629 if (c0_ < 0) | |
630 return Token::EOS; | |
631 return Select(Token::ILLEGAL); | |
632 } | |
633 | |
634 UNREACHABLE(); | |
635 return Token::ILLEGAL; | |
636 } | |
637 | |
638 | |
639 // Returns true if any decimal digits were scanned, returns false otherwise. | 719 // Returns true if any decimal digits were scanned, returns false otherwise. |
640 void Scanner::ScanDecimalDigits() { | 720 void Scanner::ScanDecimalDigits() { |
641 while (IsDecimalDigit(c0_)) | 721 while (IsDecimalDigit(c0_)) |
642 AddCharAdvance(); | 722 AddCharAdvance(); |
643 } | 723 } |
644 | 724 |
645 | 725 |
646 Token::Value Scanner::ScanNumber(bool seen_period) { | 726 Token::Value Scanner::ScanNumber(bool seen_period) { |
647 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction | 727 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction |
648 | 728 |
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
727 uc32 c = ScanHexEscape('u', 4); | 807 uc32 c = ScanHexEscape('u', 4); |
728 // We do not allow a unicode escape sequence to start another | 808 // We do not allow a unicode escape sequence to start another |
729 // unicode escape sequence. | 809 // unicode escape sequence. |
730 if (c == '\\') return unibrow::Utf8::kBadChar; | 810 if (c == '\\') return unibrow::Utf8::kBadChar; |
731 return c; | 811 return c; |
732 } | 812 } |
733 | 813 |
734 | 814 |
735 Token::Value Scanner::ScanIdentifier() { | 815 Token::Value Scanner::ScanIdentifier() { |
736 ASSERT(kIsIdentifierStart.get(c0_)); | 816 ASSERT(kIsIdentifierStart.get(c0_)); |
737 | |
738 bool has_escapes = false; | 817 bool has_escapes = false; |
739 | 818 |
740 StartLiteral(); | 819 StartLiteral(); |
741 // Scan identifier start character. | 820 // Scan identifier start character. |
742 if (c0_ == '\\') { | 821 if (c0_ == '\\') { |
743 has_escapes = true; | 822 has_escapes = true; |
744 uc32 c = ScanIdentifierUnicodeEscape(); | 823 uc32 c = ScanIdentifierUnicodeEscape(); |
745 // Only allow legal identifier start characters. | 824 // Only allow legal identifier start characters. |
746 if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL; | 825 if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL; |
747 AddChar(c); | 826 AddChar(c); |
748 } else { | 827 } else { |
749 AddCharAdvance(); | 828 AddChar(c0_); |
| 829 Advance(); |
750 } | 830 } |
| 831 |
751 // Scan the rest of the identifier characters. | 832 // Scan the rest of the identifier characters. |
752 while (kIsIdentifierPart.get(c0_)) { | 833 while (kIsIdentifierPart.get(c0_)) { |
753 if (c0_ == '\\') { | 834 if (c0_ == '\\') { |
754 has_escapes = true; | 835 has_escapes = true; |
755 uc32 c = ScanIdentifierUnicodeEscape(); | 836 uc32 c = ScanIdentifierUnicodeEscape(); |
756 // Only allow legal identifier part characters. | 837 // Only allow legal identifier part characters. |
757 if (!kIsIdentifierPart.get(c)) return Token::ILLEGAL; | 838 if (!kIsIdentifierPart.get(c)) return Token::ILLEGAL; |
758 AddChar(c); | 839 AddChar(c); |
759 } else { | 840 } else { |
760 AddCharAdvance(); | 841 AddChar(c0_); |
| 842 Advance(); |
761 } | 843 } |
762 } | 844 } |
763 TerminateLiteral(); | 845 TerminateLiteral(); |
764 | 846 |
765 // We don't have any 1-letter keywords (this is probably a common case). | 847 // We don't have any 1-letter keywords (this is probably a common case). |
766 if ((next_.literal_end - next_.literal_pos) == 1) | 848 if ((next_.literal_end - next_.literal_pos) == 1) { |
767 return Token::IDENTIFIER; | 849 return Token::IDENTIFIER; |
| 850 } |
768 | 851 |
769 // If the identifier contains unicode escapes, it must not be | 852 // If the identifier contains unicode escapes, it must not be |
770 // resolved to a keyword. | 853 // resolved to a keyword. |
771 if (has_escapes) | 854 if (has_escapes) { |
772 return Token::IDENTIFIER; | 855 return Token::IDENTIFIER; |
| 856 } |
773 | 857 |
774 return Token::Lookup(&literals_.data()[next_.literal_pos]); | 858 return Token::Lookup(&literals_.data()[next_.literal_pos]); |
775 } | 859 } |
776 | 860 |
777 | 861 |
778 | 862 |
779 bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) { | 863 bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) { |
780 // Checks whether the buffer contains an identifier (no escape). | 864 // Checks whether the buffer contains an identifier (no escape). |
781 if (!buffer->has_more()) return false; | 865 if (!buffer->has_more()) return false; |
782 if (!kIsIdentifierStart.get(buffer->GetNext())) return false; | 866 if (!kIsIdentifierStart.get(buffer->GetNext())) return false; |
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
841 } | 925 } |
842 AddCharAdvance(); | 926 AddCharAdvance(); |
843 } | 927 } |
844 TerminateLiteral(); | 928 TerminateLiteral(); |
845 | 929 |
846 next_.location.end_pos = source_pos() - 1; | 930 next_.location.end_pos = source_pos() - 1; |
847 return true; | 931 return true; |
848 } | 932 } |
849 | 933 |
850 } } // namespace v8::internal | 934 } } // namespace v8::internal |
OLD | NEW |