Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(4)

Side by Side Diff: src/scanner-base.cc

Issue 5136002: Extract scanner base/JS/JSON and move base and JS to scanner-base. (Closed)
Patch Set: Created 10 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/scanner-base.h ('k') | test/cctest/test-parsing.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2010 the V8 project authors. All rights reserved. 1 // Copyright 2010 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 11 matching lines...) Expand all
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 27
28 // Features shared by parsing and pre-parsing scanners. 28 // Features shared by parsing and pre-parsing scanners.
29 29
30 #include "../include/v8stdint.h" 30 #include "../include/v8stdint.h"
31 #include "scanner-base.h" 31 #include "scanner-base.h"
32 #include "char-predicates-inl.h"
32 33
33 namespace v8 { 34 namespace v8 {
34 namespace internal { 35 namespace internal {
35 36
36 // ---------------------------------------------------------------------------- 37 // ----------------------------------------------------------------------------
38 // UTF16Buffer
39
40 UTF16Buffer::UTF16Buffer()
41 : pos_(0), end_(kNoEndPosition) { }
42
43 // ----------------------------------------------------------------------------
44 // LiteralCollector
45
46 LiteralCollector::LiteralCollector()
47 : buffer_(kInitialCapacity), recording_(false) { }
48
49
50 LiteralCollector::~LiteralCollector() {}
51
52
53 void LiteralCollector::AddCharSlow(uc32 c) {
54 ASSERT(static_cast<unsigned>(c) > unibrow::Utf8::kMaxOneByteChar);
55 int length = unibrow::Utf8::Length(c);
56 Vector<char> block = buffer_.AddBlock(length, '\0');
57 #ifdef DEBUG
58 int written_length = unibrow::Utf8::Encode(block.start(), c);
59 CHECK_EQ(length, written_length);
60 #else
61 unibrow::Utf8::Encode(block.start(), c);
62 #endif
63 }
64
65 // ----------------------------------------------------------------------------
37 // Character predicates 66 // Character predicates
38 67
39 unibrow::Predicate<IdentifierStart, 128> ScannerConstants::kIsIdentifierStart; 68 unibrow::Predicate<IdentifierStart, 128> ScannerConstants::kIsIdentifierStart;
40 unibrow::Predicate<IdentifierPart, 128> ScannerConstants::kIsIdentifierPart; 69 unibrow::Predicate<IdentifierPart, 128> ScannerConstants::kIsIdentifierPart;
41 unibrow::Predicate<unibrow::WhiteSpace, 128> ScannerConstants::kIsWhiteSpace; 70 unibrow::Predicate<unibrow::WhiteSpace, 128> ScannerConstants::kIsWhiteSpace;
42 unibrow::Predicate<unibrow::LineTerminator, 128> 71 unibrow::Predicate<unibrow::LineTerminator, 128>
43 ScannerConstants::kIsLineTerminator; 72 ScannerConstants::kIsLineTerminator;
44 73
45 StaticResource<ScannerConstants::Utf8Decoder> ScannerConstants::utf8_decoder_; 74 StaticResource<ScannerConstants::Utf8Decoder> ScannerConstants::utf8_decoder_;
46 75
47 // Compound predicates. 76 // Compound predicates.
48 77
49 bool ScannerConstants::IsIdentifier(unibrow::CharacterStream* buffer) { 78 bool ScannerConstants::IsIdentifier(unibrow::CharacterStream* buffer) {
50 // Checks whether the buffer contains an identifier (no escape). 79 // Checks whether the buffer contains an identifier (no escape).
51 if (!buffer->has_more()) return false; 80 if (!buffer->has_more()) return false;
52 if (!kIsIdentifierStart.get(buffer->GetNext())) { 81 if (!kIsIdentifierStart.get(buffer->GetNext())) {
53 return false; 82 return false;
54 } 83 }
55 while (buffer->has_more()) { 84 while (buffer->has_more()) {
56 if (!kIsIdentifierPart.get(buffer->GetNext())) { 85 if (!kIsIdentifierPart.get(buffer->GetNext())) {
57 return false; 86 return false;
58 } 87 }
59 } 88 }
60 return true; 89 return true;
61 } 90 }
62 91
63 // ---------------------------------------------------------------------------- 92 // ----------------------------------------------------------------------------
93 // Scanner
94
95 Scanner::Scanner() : source_(NULL), stack_overflow_(false) {}
96
97
98 uc32 Scanner::ScanHexEscape(uc32 c, int length) {
99 ASSERT(length <= 4); // prevent overflow
100
101 uc32 digits[4];
102 uc32 x = 0;
103 for (int i = 0; i < length; i++) {
104 digits[i] = c0_;
105 int d = HexValue(c0_);
106 if (d < 0) {
107 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes
108 // should be illegal, but other JS VMs just return the
109 // non-escaped version of the original character.
110
111 // Push back digits read, except the last one (in c0_).
112 for (int j = i-1; j >= 0; j--) {
113 PushBack(digits[j]);
114 }
115 // Notice: No handling of error - treat it as "\u"->"u".
116 return c;
117 }
118 x = x * 16 + d;
119 Advance();
120 }
121
122 return x;
123 }
124
125
126 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of
127 // ECMA-262. Other JS VMs support them.
128 uc32 Scanner::ScanOctalEscape(uc32 c, int length) {
129 uc32 x = c - '0';
130 for (int i = 0; i < length; i++) {
131 int d = c0_ - '0';
132 if (d < 0 || d > 7) break;
133 int nx = x * 8 + d;
134 if (nx >= 256) break;
135 x = nx;
136 Advance();
137 }
138 return x;
139 }
140
141
142 // ----------------------------------------------------------------------------
143 // JavaScriptScanner
144
145 JavaScriptScanner::JavaScriptScanner()
146 : has_line_terminator_before_next_(false) {}
147
148
149 Token::Value JavaScriptScanner::Next() {
150 current_ = next_;
151 has_line_terminator_before_next_ = false;
152 Scan();
153 return current_.token;
154 }
155
156
157 static inline bool IsByteOrderMark(uc32 c) {
158 // The Unicode value U+FFFE is guaranteed never to be assigned as a
159 // Unicode character; this implies that in a Unicode context the
160 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF
161 // character expressed in little-endian byte order (since it could
162 // not be a U+FFFE character expressed in big-endian byte
163 // order). Nevertheless, we check for it to be compatible with
164 // Spidermonkey.
165 return c == 0xFEFF || c == 0xFFFE;
166 }
167
168
169 bool JavaScriptScanner::SkipWhiteSpace() {
170 int start_position = source_pos();
171
172 while (true) {
173 // We treat byte-order marks (BOMs) as whitespace for better
174 // compatibility with Spidermonkey and other JavaScript engines.
175 while (ScannerConstants::kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) {
176 // IsWhiteSpace() includes line terminators!
177 if (ScannerConstants::kIsLineTerminator.get(c0_)) {
178 // Ignore line terminators, but remember them. This is necessary
179 // for automatic semicolon insertion.
180 has_line_terminator_before_next_ = true;
181 }
182 Advance();
183 }
184
185 // If there is an HTML comment end '-->' at the beginning of a
186 // line (with only whitespace in front of it), we treat the rest
187 // of the line as a comment. This is in line with the way
188 // SpiderMonkey handles it.
189 if (c0_ == '-' && has_line_terminator_before_next_) {
190 Advance();
191 if (c0_ == '-') {
192 Advance();
193 if (c0_ == '>') {
194 // Treat the rest of the line as a comment.
195 SkipSingleLineComment();
196 // Continue skipping white space after the comment.
197 continue;
198 }
199 PushBack('-'); // undo Advance()
200 }
201 PushBack('-'); // undo Advance()
202 }
203 // Return whether or not we skipped any characters.
204 return source_pos() != start_position;
205 }
206 }
207
208
209 Token::Value JavaScriptScanner::SkipSingleLineComment() {
210 Advance();
211
212 // The line terminator at the end of the line is not considered
213 // to be part of the single-line comment; it is recognized
214 // separately by the lexical grammar and becomes part of the
215 // stream of input elements for the syntactic grammar (see
216 // ECMA-262, section 7.4, page 12).
217 while (c0_ >= 0 && !ScannerConstants::kIsLineTerminator.get(c0_)) {
218 Advance();
219 }
220
221 return Token::WHITESPACE;
222 }
223
224
225 Token::Value JavaScriptScanner::SkipMultiLineComment() {
226 ASSERT(c0_ == '*');
227 Advance();
228
229 while (c0_ >= 0) {
230 char ch = c0_;
231 Advance();
232 // If we have reached the end of the multi-line comment, we
233 // consume the '/' and insert a whitespace. This way all
234 // multi-line comments are treated as whitespace - even the ones
235 // containing line terminators. This contradicts ECMA-262, section
236 // 7.4, page 12, that says that multi-line comments containing
237 // line terminators should be treated as a line terminator, but it
238 // matches the behaviour of SpiderMonkey and KJS.
239 if (ch == '*' && c0_ == '/') {
240 c0_ = ' ';
241 return Token::WHITESPACE;
242 }
243 }
244
245 // Unterminated multi-line comment.
246 return Token::ILLEGAL;
247 }
248
249
250 Token::Value JavaScriptScanner::ScanHtmlComment() {
251 // Check for <!-- comments.
252 ASSERT(c0_ == '!');
253 Advance();
254 if (c0_ == '-') {
255 Advance();
256 if (c0_ == '-') return SkipSingleLineComment();
257 PushBack('-'); // undo Advance()
258 }
259 PushBack('!'); // undo Advance()
260 ASSERT(c0_ == '!');
261 return Token::LT;
262 }
263
264
265 void JavaScriptScanner::Scan() {
266 next_.literal_chars = Vector<const char>();
267 Token::Value token;
268 do {
269 // Remember the position of the next token
270 next_.location.beg_pos = source_pos();
271
272 switch (c0_) {
273 case ' ':
274 case '\t':
275 Advance();
276 token = Token::WHITESPACE;
277 break;
278
279 case '\n':
280 Advance();
281 has_line_terminator_before_next_ = true;
282 token = Token::WHITESPACE;
283 break;
284
285 case '"': case '\'':
286 token = ScanString();
287 break;
288
289 case '<':
290 // < <= << <<= <!--
291 Advance();
292 if (c0_ == '=') {
293 token = Select(Token::LTE);
294 } else if (c0_ == '<') {
295 token = Select('=', Token::ASSIGN_SHL, Token::SHL);
296 } else if (c0_ == '!') {
297 token = ScanHtmlComment();
298 } else {
299 token = Token::LT;
300 }
301 break;
302
303 case '>':
304 // > >= >> >>= >>> >>>=
305 Advance();
306 if (c0_ == '=') {
307 token = Select(Token::GTE);
308 } else if (c0_ == '>') {
309 // >> >>= >>> >>>=
310 Advance();
311 if (c0_ == '=') {
312 token = Select(Token::ASSIGN_SAR);
313 } else if (c0_ == '>') {
314 token = Select('=', Token::ASSIGN_SHR, Token::SHR);
315 } else {
316 token = Token::SAR;
317 }
318 } else {
319 token = Token::GT;
320 }
321 break;
322
323 case '=':
324 // = == ===
325 Advance();
326 if (c0_ == '=') {
327 token = Select('=', Token::EQ_STRICT, Token::EQ);
328 } else {
329 token = Token::ASSIGN;
330 }
331 break;
332
333 case '!':
334 // ! != !==
335 Advance();
336 if (c0_ == '=') {
337 token = Select('=', Token::NE_STRICT, Token::NE);
338 } else {
339 token = Token::NOT;
340 }
341 break;
342
343 case '+':
344 // + ++ +=
345 Advance();
346 if (c0_ == '+') {
347 token = Select(Token::INC);
348 } else if (c0_ == '=') {
349 token = Select(Token::ASSIGN_ADD);
350 } else {
351 token = Token::ADD;
352 }
353 break;
354
355 case '-':
356 // - -- --> -=
357 Advance();
358 if (c0_ == '-') {
359 Advance();
360 if (c0_ == '>' && has_line_terminator_before_next_) {
361 // For compatibility with SpiderMonkey, we skip lines that
362 // start with an HTML comment end '-->'.
363 token = SkipSingleLineComment();
364 } else {
365 token = Token::DEC;
366 }
367 } else if (c0_ == '=') {
368 token = Select(Token::ASSIGN_SUB);
369 } else {
370 token = Token::SUB;
371 }
372 break;
373
374 case '*':
375 // * *=
376 token = Select('=', Token::ASSIGN_MUL, Token::MUL);
377 break;
378
379 case '%':
380 // % %=
381 token = Select('=', Token::ASSIGN_MOD, Token::MOD);
382 break;
383
384 case '/':
385 // / // /* /=
386 Advance();
387 if (c0_ == '/') {
388 token = SkipSingleLineComment();
389 } else if (c0_ == '*') {
390 token = SkipMultiLineComment();
391 } else if (c0_ == '=') {
392 token = Select(Token::ASSIGN_DIV);
393 } else {
394 token = Token::DIV;
395 }
396 break;
397
398 case '&':
399 // & && &=
400 Advance();
401 if (c0_ == '&') {
402 token = Select(Token::AND);
403 } else if (c0_ == '=') {
404 token = Select(Token::ASSIGN_BIT_AND);
405 } else {
406 token = Token::BIT_AND;
407 }
408 break;
409
410 case '|':
411 // | || |=
412 Advance();
413 if (c0_ == '|') {
414 token = Select(Token::OR);
415 } else if (c0_ == '=') {
416 token = Select(Token::ASSIGN_BIT_OR);
417 } else {
418 token = Token::BIT_OR;
419 }
420 break;
421
422 case '^':
423 // ^ ^=
424 token = Select('=', Token::ASSIGN_BIT_XOR, Token::BIT_XOR);
425 break;
426
427 case '.':
428 // . Number
429 Advance();
430 if (IsDecimalDigit(c0_)) {
431 token = ScanNumber(true);
432 } else {
433 token = Token::PERIOD;
434 }
435 break;
436
437 case ':':
438 token = Select(Token::COLON);
439 break;
440
441 case ';':
442 token = Select(Token::SEMICOLON);
443 break;
444
445 case ',':
446 token = Select(Token::COMMA);
447 break;
448
449 case '(':
450 token = Select(Token::LPAREN);
451 break;
452
453 case ')':
454 token = Select(Token::RPAREN);
455 break;
456
457 case '[':
458 token = Select(Token::LBRACK);
459 break;
460
461 case ']':
462 token = Select(Token::RBRACK);
463 break;
464
465 case '{':
466 token = Select(Token::LBRACE);
467 break;
468
469 case '}':
470 token = Select(Token::RBRACE);
471 break;
472
473 case '?':
474 token = Select(Token::CONDITIONAL);
475 break;
476
477 case '~':
478 token = Select(Token::BIT_NOT);
479 break;
480
481 default:
482 if (ScannerConstants::kIsIdentifierStart.get(c0_)) {
483 token = ScanIdentifier();
484 } else if (IsDecimalDigit(c0_)) {
485 token = ScanNumber(false);
486 } else if (SkipWhiteSpace()) {
487 token = Token::WHITESPACE;
488 } else if (c0_ < 0) {
489 token = Token::EOS;
490 } else {
491 token = Select(Token::ILLEGAL);
492 }
493 break;
494 }
495
496 // Continue scanning for tokens as long as we're just skipping
497 // whitespace.
498 } while (token == Token::WHITESPACE);
499
500 next_.location.end_pos = source_pos();
501 next_.token = token;
502 }
503
504
505 void JavaScriptScanner::SeekForward(int pos) {
506 source_->SeekForward(pos - 1);
507 Advance();
508 // This function is only called to seek to the location
509 // of the end of a function (at the "}" token). It doesn't matter
510 // whether there was a line terminator in the part we skip.
511 has_line_terminator_before_next_ = false;
512 Scan();
513 }
514
515
516 void JavaScriptScanner::ScanEscape() {
517 uc32 c = c0_;
518 Advance();
519
520 // Skip escaped newlines.
521 if (ScannerConstants::kIsLineTerminator.get(c)) {
522 // Allow CR+LF newlines in multiline string literals.
523 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();
524 // Allow LF+CR newlines in multiline string literals.
525 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance();
526 return;
527 }
528
529 switch (c) {
530 case '\'': // fall through
531 case '"' : // fall through
532 case '\\': break;
533 case 'b' : c = '\b'; break;
534 case 'f' : c = '\f'; break;
535 case 'n' : c = '\n'; break;
536 case 'r' : c = '\r'; break;
537 case 't' : c = '\t'; break;
538 case 'u' : c = ScanHexEscape(c, 4); break;
539 case 'v' : c = '\v'; break;
540 case 'x' : c = ScanHexEscape(c, 2); break;
541 case '0' : // fall through
542 case '1' : // fall through
543 case '2' : // fall through
544 case '3' : // fall through
545 case '4' : // fall through
546 case '5' : // fall through
547 case '6' : // fall through
548 case '7' : c = ScanOctalEscape(c, 2); break;
549 }
550
551 // According to ECMA-262, 3rd, 7.8.4 (p 18ff) these
552 // should be illegal, but they are commonly handled
553 // as non-escaped characters by JS VMs.
554 AddLiteralChar(c);
555 }
556
557
558 Token::Value JavaScriptScanner::ScanString() {
559 uc32 quote = c0_;
560 Advance(); // consume quote
561
562 LiteralScope literal(this);
563 while (c0_ != quote && c0_ >= 0
564 && !ScannerConstants::kIsLineTerminator.get(c0_)) {
565 uc32 c = c0_;
566 Advance();
567 if (c == '\\') {
568 if (c0_ < 0) return Token::ILLEGAL;
569 ScanEscape();
570 } else {
571 AddLiteralChar(c);
572 }
573 }
574 if (c0_ != quote) return Token::ILLEGAL;
575 literal.Complete();
576
577 Advance(); // consume quote
578 return Token::STRING;
579 }
580
581
582 void JavaScriptScanner::ScanDecimalDigits() {
583 while (IsDecimalDigit(c0_))
584 AddLiteralCharAdvance();
585 }
586
587
588 Token::Value JavaScriptScanner::ScanNumber(bool seen_period) {
589 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction
590
591 enum { DECIMAL, HEX, OCTAL } kind = DECIMAL;
592
593 LiteralScope literal(this);
594 if (seen_period) {
595 // we have already seen a decimal point of the float
596 AddLiteralChar('.');
597 ScanDecimalDigits(); // we know we have at least one digit
598
599 } else {
600 // if the first character is '0' we must check for octals and hex
601 if (c0_ == '0') {
602 AddLiteralCharAdvance();
603
604 // either 0, 0exxx, 0Exxx, 0.xxx, an octal number, or a hex number
605 if (c0_ == 'x' || c0_ == 'X') {
606 // hex number
607 kind = HEX;
608 AddLiteralCharAdvance();
609 if (!IsHexDigit(c0_)) {
610 // we must have at least one hex digit after 'x'/'X'
611 return Token::ILLEGAL;
612 }
613 while (IsHexDigit(c0_)) {
614 AddLiteralCharAdvance();
615 }
616 } else if ('0' <= c0_ && c0_ <= '7') {
617 // (possible) octal number
618 kind = OCTAL;
619 while (true) {
620 if (c0_ == '8' || c0_ == '9') {
621 kind = DECIMAL;
622 break;
623 }
624 if (c0_ < '0' || '7' < c0_) break;
625 AddLiteralCharAdvance();
626 }
627 }
628 }
629
630 // Parse decimal digits and allow trailing fractional part.
631 if (kind == DECIMAL) {
632 ScanDecimalDigits(); // optional
633 if (c0_ == '.') {
634 AddLiteralCharAdvance();
635 ScanDecimalDigits(); // optional
636 }
637 }
638 }
639
640 // scan exponent, if any
641 if (c0_ == 'e' || c0_ == 'E') {
642 ASSERT(kind != HEX); // 'e'/'E' must be scanned as part of the hex number
643 if (kind == OCTAL) return Token::ILLEGAL; // no exponent for octals allowed
644 // scan exponent
645 AddLiteralCharAdvance();
646 if (c0_ == '+' || c0_ == '-')
647 AddLiteralCharAdvance();
648 if (!IsDecimalDigit(c0_)) {
649 // we must have at least one decimal digit after 'e'/'E'
650 return Token::ILLEGAL;
651 }
652 ScanDecimalDigits();
653 }
654
655 // The source character immediately following a numeric literal must
656 // not be an identifier start or a decimal digit; see ECMA-262
657 // section 7.8.3, page 17 (note that we read only one decimal digit
658 // if the value is 0).
659 if (IsDecimalDigit(c0_) || ScannerConstants::kIsIdentifierStart.get(c0_))
660 return Token::ILLEGAL;
661
662 literal.Complete();
663
664 return Token::NUMBER;
665 }
666
667
668 uc32 JavaScriptScanner::ScanIdentifierUnicodeEscape() {
669 Advance();
670 if (c0_ != 'u') return unibrow::Utf8::kBadChar;
671 Advance();
672 uc32 c = ScanHexEscape('u', 4);
673 // We do not allow a unicode escape sequence to start another
674 // unicode escape sequence.
675 if (c == '\\') return unibrow::Utf8::kBadChar;
676 return c;
677 }
678
679
680 Token::Value JavaScriptScanner::ScanIdentifier() {
681 ASSERT(ScannerConstants::kIsIdentifierStart.get(c0_));
682
683 LiteralScope literal(this);
684 KeywordMatcher keyword_match;
685
686 // Scan identifier start character.
687 if (c0_ == '\\') {
688 uc32 c = ScanIdentifierUnicodeEscape();
689 // Only allow legal identifier start characters.
690 if (!ScannerConstants::kIsIdentifierStart.get(c)) return Token::ILLEGAL;
691 AddLiteralChar(c);
692 keyword_match.Fail();
693 } else {
694 AddLiteralChar(c0_);
695 keyword_match.AddChar(c0_);
696 Advance();
697 }
698
699 // Scan the rest of the identifier characters.
700 while (ScannerConstants::kIsIdentifierPart.get(c0_)) {
701 if (c0_ == '\\') {
702 uc32 c = ScanIdentifierUnicodeEscape();
703 // Only allow legal identifier part characters.
704 if (!ScannerConstants::kIsIdentifierPart.get(c)) return Token::ILLEGAL;
705 AddLiteralChar(c);
706 keyword_match.Fail();
707 } else {
708 AddLiteralChar(c0_);
709 keyword_match.AddChar(c0_);
710 Advance();
711 }
712 }
713 literal.Complete();
714
715 return keyword_match.token();
716 }
717
718
719
720 bool JavaScriptScanner::ScanRegExpPattern(bool seen_equal) {
721 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags
722 bool in_character_class = false;
723
724 // Previous token is either '/' or '/=', in the second case, the
725 // pattern starts at =.
726 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1);
727 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0);
728
729 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,
730 // the scanner should pass uninterpreted bodies to the RegExp
731 // constructor.
732 LiteralScope literal(this);
733 if (seen_equal)
734 AddLiteralChar('=');
735
736 while (c0_ != '/' || in_character_class) {
737 if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false;
738 if (c0_ == '\\') { // escaped character
739 AddLiteralCharAdvance();
740 if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false;
741 AddLiteralCharAdvance();
742 } else { // unescaped character
743 if (c0_ == '[') in_character_class = true;
744 if (c0_ == ']') in_character_class = false;
745 AddLiteralCharAdvance();
746 }
747 }
748 Advance(); // consume '/'
749
750 literal.Complete();
751
752 return true;
753 }
754
755 bool JavaScriptScanner::ScanRegExpFlags() {
756 // Scan regular expression flags.
757 LiteralScope literal(this);
758 while (ScannerConstants::kIsIdentifierPart.get(c0_)) {
759 if (c0_ == '\\') {
760 uc32 c = ScanIdentifierUnicodeEscape();
761 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) {
762 // We allow any escaped character, unlike the restriction on
763 // IdentifierPart when it is used to build an IdentifierName.
764 AddLiteralChar(c);
765 continue;
766 }
767 }
768 AddLiteralCharAdvance();
769 }
770 literal.Complete();
771
772 next_.location.end_pos = source_pos() - 1;
773 return true;
774 }
775
776 // ----------------------------------------------------------------------------
64 // Keyword Matcher 777 // Keyword Matcher
65 778
66 KeywordMatcher::FirstState KeywordMatcher::first_states_[] = { 779 KeywordMatcher::FirstState KeywordMatcher::first_states_[] = {
67 { "break", KEYWORD_PREFIX, Token::BREAK }, 780 { "break", KEYWORD_PREFIX, Token::BREAK },
68 { NULL, C, Token::ILLEGAL }, 781 { NULL, C, Token::ILLEGAL },
69 { NULL, D, Token::ILLEGAL }, 782 { NULL, D, Token::ILLEGAL },
70 { "else", KEYWORD_PREFIX, Token::ELSE }, 783 { "else", KEYWORD_PREFIX, Token::ELSE },
71 { NULL, F, Token::ILLEGAL }, 784 { NULL, F, Token::ILLEGAL },
72 { NULL, UNMATCHABLE, Token::ILLEGAL }, 785 { NULL, UNMATCHABLE, Token::ILLEGAL },
73 { NULL, UNMATCHABLE, Token::ILLEGAL }, 786 { NULL, UNMATCHABLE, Token::ILLEGAL },
(...skipping 112 matching lines...) Expand 10 before | Expand all | Expand 10 after
186 if (MatchKeywordStart(input, "with", 1, Token::WITH)) return; 899 if (MatchKeywordStart(input, "with", 1, Token::WITH)) return;
187 break; 900 break;
188 case UNMATCHABLE: 901 case UNMATCHABLE:
189 break; 902 break;
190 } 903 }
191 // On fallthrough, it's a failure. 904 // On fallthrough, it's a failure.
192 state_ = UNMATCHABLE; 905 state_ = UNMATCHABLE;
193 } 906 }
194 907
195 } } // namespace v8::internal 908 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « src/scanner-base.h ('k') | test/cctest/test-parsing.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698