Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(145)

Side by Side Diff: src/scanner.cc

Issue 113336: Optimize the lexical scanner by selective inlining, and... (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: Created 11 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/scanner.h ('k') | src/token.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 30 matching lines...) Expand all
41 unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator; 41 unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator;
42 unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace; 42 unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace;
43 43
44 44
45 StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_; 45 StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_;
46 46
47 47
48 // ---------------------------------------------------------------------------- 48 // ----------------------------------------------------------------------------
49 // UTF8Buffer 49 // UTF8Buffer
50 50
51 UTF8Buffer::UTF8Buffer() : data_(NULL) { 51 UTF8Buffer::UTF8Buffer() {
52 Initialize(NULL, 0); 52 static const int kInitialCapacity = 1 * KB;
53 data_ = NewArray<char>(kInitialCapacity);
54 limit_ = ComputeLimit(data_, kInitialCapacity);
55 Reset();
56 ASSERT(Capacity() == kInitialCapacity && pos() == 0);
53 } 57 }
54 58
55 59
56 UTF8Buffer::~UTF8Buffer() { 60 UTF8Buffer::~UTF8Buffer() {
57 DeleteArray(data_); 61 DeleteArray(data_);
58 } 62 }
59 63
60 64
61 void UTF8Buffer::Initialize(char* src, int length) { 65 void UTF8Buffer::AddCharSlow(uc32 c) {
62 DeleteArray(data_); 66 static const int kCapacityGrowthLimit = 1 * MB;
63 data_ = src; 67 if (cursor_ > limit_) {
64 size_ = length; 68 int old_capacity = Capacity();
65 Reset(); 69 int old_position = pos();
70 int new_capacity =
71 Min(old_capacity * 2, old_capacity + kCapacityGrowthLimit);
72 char* new_data = NewArray<char>(new_capacity);
73 memcpy(new_data, data_, old_position);
74 DeleteArray(data_);
75 data_ = new_data;
76 cursor_ = new_data + old_position;
77 limit_ = ComputeLimit(new_data, new_capacity);
78 ASSERT(Capacity() == new_capacity && pos() == old_position);
79 }
80 if (static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) {
81 *cursor_++ = c; // Common case: 7-bit ASCII.
82 } else {
83 cursor_ += unibrow::Utf8::Encode(cursor_, c);
84 }
85 ASSERT(pos() <= Capacity());
66 } 86 }
67 87
68 88
69 void UTF8Buffer::AddChar(uc32 c) {
70 const int min_size = 1024;
71 if (pos_ + static_cast<int>(unibrow::Utf8::kMaxEncodedSize) > size_) {
72 int new_size = size_ * 2;
73 if (new_size < min_size) {
74 new_size = min_size;
75 }
76 char* new_data = NewArray<char>(new_size);
77 memcpy(new_data, data_, pos_);
78 DeleteArray(data_);
79 data_ = new_data;
80 size_ = new_size;
81 }
82 if (static_cast<unsigned>(c) < unibrow::Utf8::kMaxOneByteChar) {
83 data_[pos_++] = c; // common case: 7bit ASCII
84 } else {
85 pos_ += unibrow::Utf8::Encode(&data_[pos_], c);
86 }
87 ASSERT(pos_ <= size_);
88 }
89
90
91 // ---------------------------------------------------------------------------- 89 // ----------------------------------------------------------------------------
92 // UTF16Buffer 90 // UTF16Buffer
93 91
94 92
95 UTF16Buffer::UTF16Buffer() 93 UTF16Buffer::UTF16Buffer()
96 : pos_(0), 94 : pos_(0),
97 pushback_buffer_(0), 95 pushback_buffer_(0),
98 last_(0), 96 last_(0),
99 stream_(NULL) { } 97 stream_(NULL) { }
100 98
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after
165 source_.Initialize(source, stream); 163 source_.Initialize(source, stream);
166 position_ = position; 164 position_ = position;
167 165
168 // Reset literals buffer 166 // Reset literals buffer
169 literals_.Reset(); 167 literals_.Reset();
170 168
171 // Set c0_ (one character ahead) 169 // Set c0_ (one character ahead)
172 ASSERT(kCharacterLookaheadBufferSize == 1); 170 ASSERT(kCharacterLookaheadBufferSize == 1);
173 Advance(); 171 Advance();
174 172
175 // Skip initial whitespace (allowing HTML comment ends) and scan 173 // Skip initial whitespace allowing HTML comment ends just like
176 // first token. 174 // after a newline and scan first token.
177 SkipWhiteSpace(true); 175 has_line_terminator_before_next_ = true;
176 SkipWhiteSpace();
178 Scan(); 177 Scan();
179 } 178 }
180 179
181 180
182 Handle<String> Scanner::SubString(int start, int end) { 181 Handle<String> Scanner::SubString(int start, int end) {
183 return source_.SubString(start - position_, end - position_); 182 return source_.SubString(start - position_, end - position_);
184 } 183 }
185 184
186 185
187 Token::Value Scanner::Next() { 186 Token::Value Scanner::Next() {
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after
239 // Unicode character; this implies that in a Unicode context the 238 // Unicode character; this implies that in a Unicode context the
240 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF 239 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF
241 // character expressed in little-endian byte order (since it could 240 // character expressed in little-endian byte order (since it could
242 // not be a U+FFFE character expressed in big-endian byte 241 // not be a U+FFFE character expressed in big-endian byte
243 // order). Nevertheless, we check for it to be compatible with 242 // order). Nevertheless, we check for it to be compatible with
244 // Spidermonkey. 243 // Spidermonkey.
245 return c == 0xFEFF || c == 0xFFFE; 244 return c == 0xFEFF || c == 0xFFFE;
246 } 245 }
247 246
248 247
249 void Scanner::SkipWhiteSpace(bool initial) { 248 bool Scanner::SkipWhiteSpace() {
250 has_line_terminator_before_next_ = initial; 249 int start_position = source_pos();
251 250
252 while (true) { 251 while (true) {
253 // We treat byte-order marks (BOMs) as whitespace for better 252 // We treat byte-order marks (BOMs) as whitespace for better
254 // compatibility with Spidermonkey and other JavaScript engines. 253 // compatibility with Spidermonkey and other JavaScript engines.
255 while (kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) { 254 while (kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) {
256 // IsWhiteSpace() includes line terminators! 255 // IsWhiteSpace() includes line terminators!
257 if (kIsLineTerminator.get(c0_)) 256 if (kIsLineTerminator.get(c0_)) {
258 // Ignore line terminators, but remember them. This is necessary 257 // Ignore line terminators, but remember them. This is necessary
259 // for automatic semicolon insertion. 258 // for automatic semicolon insertion.
260 has_line_terminator_before_next_ = true; 259 has_line_terminator_before_next_ = true;
260 }
261 Advance(); 261 Advance();
262 } 262 }
263 263
264 // If there is an HTML comment end '-->' at the beginning of a 264 // If there is an HTML comment end '-->' at the beginning of a
265 // line (with only whitespace in front of it), we treat the rest 265 // line (with only whitespace in front of it), we treat the rest
266 // of the line as a comment. This is in line with the way 266 // of the line as a comment. This is in line with the way
267 // SpiderMonkey handles it. 267 // SpiderMonkey handles it.
268 if (c0_ == '-' && has_line_terminator_before_next_) { 268 if (c0_ == '-' && has_line_terminator_before_next_) {
269 Advance(); 269 Advance();
270 if (c0_ == '-') { 270 if (c0_ == '-') {
271 Advance(); 271 Advance();
272 if (c0_ == '>') { 272 if (c0_ == '>') {
273 // Treat the rest of the line as a comment. 273 // Treat the rest of the line as a comment.
274 SkipSingleLineComment(); 274 SkipSingleLineComment();
275 // Continue skipping white space after the comment. 275 // Continue skipping white space after the comment.
276 continue; 276 continue;
277 } 277 }
278 PushBack('-'); // undo Advance() 278 PushBack('-'); // undo Advance()
279 } 279 }
280 PushBack('-'); // undo Advance() 280 PushBack('-'); // undo Advance()
281 } 281 }
282 return; 282 // Return whether or not we skipped any characters.
283 return source_pos() != start_position;
283 } 284 }
284 } 285 }
285 286
286 287
287 Token::Value Scanner::SkipSingleLineComment() { 288 Token::Value Scanner::SkipSingleLineComment() {
288 Advance(); 289 Advance();
289 290
290 // The line terminator at the end of the line is not considered 291 // The line terminator at the end of the line is not considered
291 // to be part of the single-line comment; it is recognized 292 // to be part of the single-line comment; it is recognized
292 // separately by the lexical grammar and becomes part of the 293 // separately by the lexical grammar and becomes part of the
293 // stream of input elements for the syntactic grammar (see 294 // stream of input elements for the syntactic grammar (see
294 // ECMA-262, section 7.4, page 12). 295 // ECMA-262, section 7.4, page 12).
295 while (c0_ >= 0 && !kIsLineTerminator.get(c0_)) { 296 while (c0_ >= 0 && !kIsLineTerminator.get(c0_)) {
296 Advance(); 297 Advance();
297 } 298 }
298 299
299 return Token::COMMENT; 300 return Token::WHITESPACE;
300 } 301 }
301 302
302 303
303 Token::Value Scanner::SkipMultiLineComment() { 304 Token::Value Scanner::SkipMultiLineComment() {
304 ASSERT(c0_ == '*'); 305 ASSERT(c0_ == '*');
305 Advance(); 306 Advance();
306 307
307 while (c0_ >= 0) { 308 while (c0_ >= 0) {
308 char ch = c0_; 309 char ch = c0_;
309 Advance(); 310 Advance();
310 // If we have reached the end of the multi-line comment, we 311 // If we have reached the end of the multi-line comment, we
311 // consume the '/' and insert a whitespace. This way all 312 // consume the '/' and insert a whitespace. This way all
312 // multi-line comments are treated as whitespace - even the ones 313 // multi-line comments are treated as whitespace - even the ones
313 // containing line terminators. This contradicts ECMA-262, section 314 // containing line terminators. This contradicts ECMA-262, section
314 // 7.4, page 12, that says that multi-line comments containing 315 // 7.4, page 12, that says that multi-line comments containing
315 // line terminators should be treated as a line terminator, but it 316 // line terminators should be treated as a line terminator, but it
316 // matches the behaviour of SpiderMonkey and KJS. 317 // matches the behaviour of SpiderMonkey and KJS.
317 if (ch == '*' && c0_ == '/') { 318 if (ch == '*' && c0_ == '/') {
318 c0_ = ' '; 319 c0_ = ' ';
319 return Token::COMMENT; 320 return Token::WHITESPACE;
320 } 321 }
321 } 322 }
322 323
323 // Unterminated multi-line comment. 324 // Unterminated multi-line comment.
324 return Token::ILLEGAL; 325 return Token::ILLEGAL;
325 } 326 }
326 327
327 328
328 Token::Value Scanner::ScanHtmlComment() { 329 Token::Value Scanner::ScanHtmlComment() {
329 // Check for <!-- comments. 330 // Check for <!-- comments.
330 ASSERT(c0_ == '!'); 331 ASSERT(c0_ == '!');
331 Advance(); 332 Advance();
332 if (c0_ == '-') { 333 if (c0_ == '-') {
333 Advance(); 334 Advance();
334 if (c0_ == '-') return SkipSingleLineComment(); 335 if (c0_ == '-') return SkipSingleLineComment();
335 PushBack('-'); // undo Advance() 336 PushBack('-'); // undo Advance()
336 } 337 }
337 PushBack('!'); // undo Advance() 338 PushBack('!'); // undo Advance()
338 ASSERT(c0_ == '!'); 339 ASSERT(c0_ == '!');
339 return Token::LT; 340 return Token::LT;
340 } 341 }
341 342
342 343
343 void Scanner::Scan() { 344 void Scanner::Scan() {
344 Token::Value token; 345 Token::Value token;
345 bool has_line_terminator = false; 346 has_line_terminator_before_next_ = false;
346 do { 347 do {
347 SkipWhiteSpace(has_line_terminator);
348
349 // Remember the line terminator in previous loop
350 has_line_terminator = has_line_terminator_before_next();
351
352 // Remember the position of the next token 348 // Remember the position of the next token
353 next_.location.beg_pos = source_pos(); 349 next_.location.beg_pos = source_pos();
354 350
355 token = ScanToken(); 351 switch (c0_) {
356 } while (token == Token::COMMENT); 352 case ' ':
353 case '\t':
354 Advance();
355 token = Token::WHITESPACE;
356 break;
357
358 case '\n':
359 Advance();
360 has_line_terminator_before_next_ = true;
361 token = Token::WHITESPACE;
362 break;
363
364 case '"': case '\'':
365 token = ScanString();
366 break;
367
368 case '<':
369 // < <= << <<= <!--
370 Advance();
371 if (c0_ == '=') {
372 token = Select(Token::LTE);
373 } else if (c0_ == '<') {
374 token = Select('=', Token::ASSIGN_SHL, Token::SHL);
375 } else if (c0_ == '!') {
376 token = ScanHtmlComment();
377 } else {
378 token = Token::LT;
379 }
380 break;
381
382 case '>':
383 // > >= >> >>= >>> >>>=
384 Advance();
385 if (c0_ == '=') {
386 token = Select(Token::GTE);
387 } else if (c0_ == '>') {
388 // >> >>= >>> >>>=
389 Advance();
390 if (c0_ == '=') {
391 token = Select(Token::ASSIGN_SAR);
392 } else if (c0_ == '>') {
393 token = Select('=', Token::ASSIGN_SHR, Token::SHR);
394 } else {
395 token = Token::SAR;
396 }
397 } else {
398 token = Token::GT;
399 }
400 break;
401
402 case '=':
403 // = == ===
404 Advance();
405 if (c0_ == '=') {
406 token = Select('=', Token::EQ_STRICT, Token::EQ);
407 } else {
408 token = Token::ASSIGN;
409 }
410 break;
411
412 case '!':
413 // ! != !==
414 Advance();
415 if (c0_ == '=') {
416 token = Select('=', Token::NE_STRICT, Token::NE);
417 } else {
418 token = Token::NOT;
419 }
420 break;
421
422 case '+':
423 // + ++ +=
424 Advance();
425 if (c0_ == '+') {
426 token = Select(Token::INC);
427 } else if (c0_ == '=') {
428 token = Select(Token::ASSIGN_ADD);
429 } else {
430 token = Token::ADD;
431 }
432 break;
433
434 case '-':
435 // - -- --> -=
436 Advance();
437 if (c0_ == '-') {
438 Advance();
439 if (c0_ == '>' && has_line_terminator_before_next_) {
440 // For compatibility with SpiderMonkey, we skip lines that
441 // start with an HTML comment end '-->'.
442 token = SkipSingleLineComment();
443 } else {
444 token = Token::DEC;
445 }
446 } else if (c0_ == '=') {
447 token = Select(Token::ASSIGN_SUB);
448 } else {
449 token = Token::SUB;
450 }
451 break;
452
453 case '*':
454 // * *=
455 token = Select('=', Token::ASSIGN_MUL, Token::MUL);
456 break;
457
458 case '%':
459 // % %=
460 token = Select('=', Token::ASSIGN_MOD, Token::MOD);
461 break;
462
463 case '/':
464 // / // /* /=
465 Advance();
466 if (c0_ == '/') {
467 token = SkipSingleLineComment();
468 } else if (c0_ == '*') {
469 token = SkipMultiLineComment();
470 } else if (c0_ == '=') {
471 token = Select(Token::ASSIGN_DIV);
472 } else {
473 token = Token::DIV;
474 }
475 break;
476
477 case '&':
478 // & && &=
479 Advance();
480 if (c0_ == '&') {
481 token = Select(Token::AND);
482 } else if (c0_ == '=') {
483 token = Select(Token::ASSIGN_BIT_AND);
484 } else {
485 token = Token::BIT_AND;
486 }
487 break;
488
489 case '|':
490 // | || |=
491 Advance();
492 if (c0_ == '|') {
493 token = Select(Token::OR);
494 } else if (c0_ == '=') {
495 token = Select(Token::ASSIGN_BIT_OR);
496 } else {
497 token = Token::BIT_OR;
498 }
499 break;
500
501 case '^':
502 // ^ ^=
503 token = Select('=', Token::ASSIGN_BIT_XOR, Token::BIT_XOR);
504 break;
505
506 case '.':
507 // . Number
508 Advance();
509 if (IsDecimalDigit(c0_)) {
510 token = ScanNumber(true);
511 } else {
512 token = Token::PERIOD;
513 }
514 break;
515
516 case ':':
517 token = Select(Token::COLON);
518 break;
519
520 case ';':
521 token = Select(Token::SEMICOLON);
522 break;
523
524 case ',':
525 token = Select(Token::COMMA);
526 break;
527
528 case '(':
529 token = Select(Token::LPAREN);
530 break;
531
532 case ')':
533 token = Select(Token::RPAREN);
534 break;
535
536 case '[':
537 token = Select(Token::LBRACK);
538 break;
539
540 case ']':
541 token = Select(Token::RBRACK);
542 break;
543
544 case '{':
545 token = Select(Token::LBRACE);
546 break;
547
548 case '}':
549 token = Select(Token::RBRACE);
550 break;
551
552 case '?':
553 token = Select(Token::CONDITIONAL);
554 break;
555
556 case '~':
557 token = Select(Token::BIT_NOT);
558 break;
559
560 default:
561 if (kIsIdentifierStart.get(c0_)) {
562 token = ScanIdentifier();
563 } else if (IsDecimalDigit(c0_)) {
564 token = ScanNumber(false);
565 } else if (SkipWhiteSpace()) {
566 token = Token::WHITESPACE;
567 } else if (c0_ < 0) {
568 token = Token::EOS;
569 } else {
570 token = Select(Token::ILLEGAL);
571 }
572 break;
573 }
574
575 // Continue scanning for tokens as long as we're just skipping
576 // whitespace.
577 } while (token == Token::WHITESPACE);
357 578
358 next_.location.end_pos = source_pos(); 579 next_.location.end_pos = source_pos();
359 next_.token = token; 580 next_.token = token;
360 } 581 }
361 582
362 583
363 void Scanner::SeekForward(int pos) { 584 void Scanner::SeekForward(int pos) {
364 source_.SeekForward(pos - 1); 585 source_.SeekForward(pos - 1);
365 Advance(); 586 Advance();
366 Scan(); 587 Scan();
(...skipping 121 matching lines...) Expand 10 before | Expand all | Expand 10 after
488 Advance(); 709 Advance();
489 if (c0_ == next) { 710 if (c0_ == next) {
490 Advance(); 711 Advance();
491 return then; 712 return then;
492 } else { 713 } else {
493 return else_; 714 return else_;
494 } 715 }
495 } 716 }
496 717
497 718
498 Token::Value Scanner::ScanToken() {
499 switch (c0_) {
500 // strings
501 case '"': case '\'':
502 return ScanString();
503
504 case '<':
505 // < <= << <<= <!--
506 Advance();
507 if (c0_ == '=') return Select(Token::LTE);
508 if (c0_ == '<') return Select('=', Token::ASSIGN_SHL, Token::SHL);
509 if (c0_ == '!') return ScanHtmlComment();
510 return Token::LT;
511
512 case '>':
513 // > >= >> >>= >>> >>>=
514 Advance();
515 if (c0_ == '=') return Select(Token::GTE);
516 if (c0_ == '>') {
517 // >> >>= >>> >>>=
518 Advance();
519 if (c0_ == '=') return Select(Token::ASSIGN_SAR);
520 if (c0_ == '>') return Select('=', Token::ASSIGN_SHR, Token::SHR);
521 return Token::SAR;
522 }
523 return Token::GT;
524
525 case '=':
526 // = == ===
527 Advance();
528 if (c0_ == '=') return Select('=', Token::EQ_STRICT, Token::EQ);
529 return Token::ASSIGN;
530
531 case '!':
532 // ! != !==
533 Advance();
534 if (c0_ == '=') return Select('=', Token::NE_STRICT, Token::NE);
535 return Token::NOT;
536
537 case '+':
538 // + ++ +=
539 Advance();
540 if (c0_ == '+') return Select(Token::INC);
541 if (c0_ == '=') return Select(Token::ASSIGN_ADD);
542 return Token::ADD;
543
544 case '-':
545 // - -- -=
546 Advance();
547 if (c0_ == '-') return Select(Token::DEC);
548 if (c0_ == '=') return Select(Token::ASSIGN_SUB);
549 return Token::SUB;
550
551 case '*':
552 // * *=
553 return Select('=', Token::ASSIGN_MUL, Token::MUL);
554
555 case '%':
556 // % %=
557 return Select('=', Token::ASSIGN_MOD, Token::MOD);
558
559 case '/':
560 // / // /* /=
561 Advance();
562 if (c0_ == '/') return SkipSingleLineComment();
563 if (c0_ == '*') return SkipMultiLineComment();
564 if (c0_ == '=') return Select(Token::ASSIGN_DIV);
565 return Token::DIV;
566
567 case '&':
568 // & && &=
569 Advance();
570 if (c0_ == '&') return Select(Token::AND);
571 if (c0_ == '=') return Select(Token::ASSIGN_BIT_AND);
572 return Token::BIT_AND;
573
574 case '|':
575 // | || |=
576 Advance();
577 if (c0_ == '|') return Select(Token::OR);
578 if (c0_ == '=') return Select(Token::ASSIGN_BIT_OR);
579 return Token::BIT_OR;
580
581 case '^':
582 // ^ ^=
583 return Select('=', Token::ASSIGN_BIT_XOR, Token::BIT_XOR);
584
585 case '.':
586 // . Number
587 Advance();
588 if (IsDecimalDigit(c0_)) return ScanNumber(true);
589 return Token::PERIOD;
590
591 case ':':
592 return Select(Token::COLON);
593
594 case ';':
595 return Select(Token::SEMICOLON);
596
597 case ',':
598 return Select(Token::COMMA);
599
600 case '(':
601 return Select(Token::LPAREN);
602
603 case ')':
604 return Select(Token::RPAREN);
605
606 case '[':
607 return Select(Token::LBRACK);
608
609 case ']':
610 return Select(Token::RBRACK);
611
612 case '{':
613 return Select(Token::LBRACE);
614
615 case '}':
616 return Select(Token::RBRACE);
617
618 case '?':
619 return Select(Token::CONDITIONAL);
620
621 case '~':
622 return Select(Token::BIT_NOT);
623
624 default:
625 if (kIsIdentifierStart.get(c0_))
626 return ScanIdentifier();
627 if (IsDecimalDigit(c0_))
628 return ScanNumber(false);
629 if (c0_ < 0)
630 return Token::EOS;
631 return Select(Token::ILLEGAL);
632 }
633
634 UNREACHABLE();
635 return Token::ILLEGAL;
636 }
637
638
639 // Returns true if any decimal digits were scanned, returns false otherwise. 719 // Returns true if any decimal digits were scanned, returns false otherwise.
640 void Scanner::ScanDecimalDigits() { 720 void Scanner::ScanDecimalDigits() {
641 while (IsDecimalDigit(c0_)) 721 while (IsDecimalDigit(c0_))
642 AddCharAdvance(); 722 AddCharAdvance();
643 } 723 }
644 724
645 725
646 Token::Value Scanner::ScanNumber(bool seen_period) { 726 Token::Value Scanner::ScanNumber(bool seen_period) {
647 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction 727 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction
648 728
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after
727 uc32 c = ScanHexEscape('u', 4); 807 uc32 c = ScanHexEscape('u', 4);
728 // We do not allow a unicode escape sequence to start another 808 // We do not allow a unicode escape sequence to start another
729 // unicode escape sequence. 809 // unicode escape sequence.
730 if (c == '\\') return unibrow::Utf8::kBadChar; 810 if (c == '\\') return unibrow::Utf8::kBadChar;
731 return c; 811 return c;
732 } 812 }
733 813
734 814
735 Token::Value Scanner::ScanIdentifier() { 815 Token::Value Scanner::ScanIdentifier() {
736 ASSERT(kIsIdentifierStart.get(c0_)); 816 ASSERT(kIsIdentifierStart.get(c0_));
737
738 bool has_escapes = false; 817 bool has_escapes = false;
739 818
740 StartLiteral(); 819 StartLiteral();
741 // Scan identifier start character. 820 // Scan identifier start character.
742 if (c0_ == '\\') { 821 if (c0_ == '\\') {
743 has_escapes = true; 822 has_escapes = true;
744 uc32 c = ScanIdentifierUnicodeEscape(); 823 uc32 c = ScanIdentifierUnicodeEscape();
745 // Only allow legal identifier start characters. 824 // Only allow legal identifier start characters.
746 if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL; 825 if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL;
747 AddChar(c); 826 AddChar(c);
748 } else { 827 } else {
749 AddCharAdvance(); 828 AddChar(c0_);
829 Advance();
750 } 830 }
831
751 // Scan the rest of the identifier characters. 832 // Scan the rest of the identifier characters.
752 while (kIsIdentifierPart.get(c0_)) { 833 while (kIsIdentifierPart.get(c0_)) {
753 if (c0_ == '\\') { 834 if (c0_ == '\\') {
754 has_escapes = true; 835 has_escapes = true;
755 uc32 c = ScanIdentifierUnicodeEscape(); 836 uc32 c = ScanIdentifierUnicodeEscape();
756 // Only allow legal identifier part characters. 837 // Only allow legal identifier part characters.
757 if (!kIsIdentifierPart.get(c)) return Token::ILLEGAL; 838 if (!kIsIdentifierPart.get(c)) return Token::ILLEGAL;
758 AddChar(c); 839 AddChar(c);
759 } else { 840 } else {
760 AddCharAdvance(); 841 AddChar(c0_);
842 Advance();
761 } 843 }
762 } 844 }
763 TerminateLiteral(); 845 TerminateLiteral();
764 846
765 // We don't have any 1-letter keywords (this is probably a common case). 847 // We don't have any 1-letter keywords (this is probably a common case).
766 if ((next_.literal_end - next_.literal_pos) == 1) 848 if ((next_.literal_end - next_.literal_pos) == 1) {
767 return Token::IDENTIFIER; 849 return Token::IDENTIFIER;
850 }
768 851
769 // If the identifier contains unicode escapes, it must not be 852 // If the identifier contains unicode escapes, it must not be
770 // resolved to a keyword. 853 // resolved to a keyword.
771 if (has_escapes) 854 if (has_escapes) {
772 return Token::IDENTIFIER; 855 return Token::IDENTIFIER;
856 }
773 857
774 return Token::Lookup(&literals_.data()[next_.literal_pos]); 858 return Token::Lookup(&literals_.data()[next_.literal_pos]);
775 } 859 }
776 860
777 861
778 862
779 bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) { 863 bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) {
780 // Checks whether the buffer contains an identifier (no escape). 864 // Checks whether the buffer contains an identifier (no escape).
781 if (!buffer->has_more()) return false; 865 if (!buffer->has_more()) return false;
782 if (!kIsIdentifierStart.get(buffer->GetNext())) return false; 866 if (!kIsIdentifierStart.get(buffer->GetNext())) return false;
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after
841 } 925 }
842 AddCharAdvance(); 926 AddCharAdvance();
843 } 927 }
844 TerminateLiteral(); 928 TerminateLiteral();
845 929
846 next_.location.end_pos = source_pos() - 1; 930 next_.location.end_pos = source_pos() - 1;
847 return true; 931 return true;
848 } 932 }
849 933
850 } } // namespace v8::internal 934 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « src/scanner.h ('k') | src/token.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698