src/scanner-base.cc - Issue 5136002: Extract scanner base/JS/JSON and move base and JS to scanner-base.

Side by Side Diff: src/scanner-base.cc

Issue 5136002: Extract scanner base/JS/JSON and move base and JS to scanner-base. (Closed)

Patch Set: Created 10 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2010 the V8 project authors. All rights reserved.	1 // Copyright 2010 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 11 matching lines...) Expand all Loading...
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,	22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY	23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT	24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE	25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.	26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

27	27

28 // Features shared by parsing and pre-parsing scanners.	28 // Features shared by parsing and pre-parsing scanners.

29	29

30 #include "../include/v8stdint.h"	30 #include "../include/v8stdint.h"

31 #include "scanner-base.h"	31 #include "scanner-base.h"

	32 #include "char-predicates-inl.h"

32	33

33 namespace v8 {	34 namespace v8 {

34 namespace internal {	35 namespace internal {

35	36

36 // ----------------------------------------------------------------------------	37 // ----------------------------------------------------------------------------

	38 // UTF16Buffer

	39

	40 UTF16Buffer::UTF16Buffer()

	41 : pos_(0), end_(kNoEndPosition) { }

	42

	43 // ----------------------------------------------------------------------------

	44 // LiteralCollector

	45

	46 LiteralCollector::LiteralCollector()

	47 : buffer_(kInitialCapacity), recording_(false) { }

	48

	49

	50 LiteralCollector::~LiteralCollector() {}

	51

	52

	53 void LiteralCollector::AddCharSlow(uc32 c) {

	54 ASSERT(static_cast<unsigned>(c) > unibrow::Utf8::kMaxOneByteChar);

	55 int length = unibrow::Utf8::Length(c);

	56 Vector<char> block = buffer_.AddBlock(length, '\0');

	57 #ifdef DEBUG

	58 int written_length = unibrow::Utf8::Encode(block.start(), c);

	59 CHECK_EQ(length, written_length);

	60 #else

	61 unibrow::Utf8::Encode(block.start(), c);

	62 #endif

	63 }

	64

	65 // ----------------------------------------------------------------------------

37 // Character predicates	66 // Character predicates

38	67

39 unibrow::Predicate<IdentifierStart, 128> ScannerConstants::kIsIdentifierStart;	68 unibrow::Predicate<IdentifierStart, 128> ScannerConstants::kIsIdentifierStart;

40 unibrow::Predicate<IdentifierPart, 128> ScannerConstants::kIsIdentifierPart;	69 unibrow::Predicate<IdentifierPart, 128> ScannerConstants::kIsIdentifierPart;

41 unibrow::Predicate<unibrow::WhiteSpace, 128> ScannerConstants::kIsWhiteSpace;	70 unibrow::Predicate<unibrow::WhiteSpace, 128> ScannerConstants::kIsWhiteSpace;

42 unibrow::Predicate<unibrow::LineTerminator, 128>	71 unibrow::Predicate<unibrow::LineTerminator, 128>

43 ScannerConstants::kIsLineTerminator;	72 ScannerConstants::kIsLineTerminator;

44	73

45 StaticResource<ScannerConstants::Utf8Decoder> ScannerConstants::utf8_decoder_;	74 StaticResource<ScannerConstants::Utf8Decoder> ScannerConstants::utf8_decoder_;

46	75

47 // Compound predicates.	76 // Compound predicates.

48	77

49 bool ScannerConstants::IsIdentifier(unibrow::CharacterStream* buffer) {	78 bool ScannerConstants::IsIdentifier(unibrow::CharacterStream* buffer) {

50 // Checks whether the buffer contains an identifier (no escape).	79 // Checks whether the buffer contains an identifier (no escape).

51 if (!buffer->has_more()) return false;	80 if (!buffer->has_more()) return false;

52 if (!kIsIdentifierStart.get(buffer->GetNext())) {	81 if (!kIsIdentifierStart.get(buffer->GetNext())) {

53 return false;	82 return false;

54 }	83 }

55 while (buffer->has_more()) {	84 while (buffer->has_more()) {

56 if (!kIsIdentifierPart.get(buffer->GetNext())) {	85 if (!kIsIdentifierPart.get(buffer->GetNext())) {

57 return false;	86 return false;

58 }	87 }

59 }	88 }

60 return true;	89 return true;

61 }	90 }

62	91

63 // ----------------------------------------------------------------------------	92 // ----------------------------------------------------------------------------

	93 // Scanner

	94

	95 Scanner::Scanner() : source_(NULL), stack_overflow_(false) {}

	96

	97

	98 uc32 Scanner::ScanHexEscape(uc32 c, int length) {

	99 ASSERT(length <= 4); // prevent overflow

	100

	101 uc32 digits[4];

	102 uc32 x = 0;

	103 for (int i = 0; i < length; i++) {

	104 digits[i] = c0_;

	105 int d = HexValue(c0_);

	106 if (d < 0) {

	107 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes

	108 // should be illegal, but other JS VMs just return the

	109 // non-escaped version of the original character.

	110

	111 // Push back digits read, except the last one (in c0_).

	112 for (int j = i-1; j >= 0; j--) {

	113 PushBack(digits[j]);

	114 }

	115 // Notice: No handling of error - treat it as "\u"->"u".

	116 return c;

	117 }

	118 x = x * 16 + d;

	119 Advance();

	120 }

	121

	122 return x;

	123 }

	124

	125

	126 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of

	127 // ECMA-262. Other JS VMs support them.

	128 uc32 Scanner::ScanOctalEscape(uc32 c, int length) {

	129 uc32 x = c - '0';

	130 for (int i = 0; i < length; i++) {

	131 int d = c0_ - '0';

	132 if (d < 0 \|\| d > 7) break;

	133 int nx = x * 8 + d;

	134 if (nx >= 256) break;

	135 x = nx;

	136 Advance();

	137 }

	138 return x;

	139 }

	140

	141

	142 // ----------------------------------------------------------------------------

	143 // JavaScriptScanner

	144

	145 JavaScriptScanner::JavaScriptScanner()

	146 : has_line_terminator_before_next_(false) {}

	147

	148

	149 Token::Value JavaScriptScanner::Next() {

	150 current_ = next_;

	151 has_line_terminator_before_next_ = false;

	152 Scan();

	153 return current_.token;

	154 }

	155

	156

	157 static inline bool IsByteOrderMark(uc32 c) {

	158 // The Unicode value U+FFFE is guaranteed never to be assigned as a

	159 // Unicode character; this implies that in a Unicode context the

	160 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF

	161 // character expressed in little-endian byte order (since it could

	162 // not be a U+FFFE character expressed in big-endian byte

	163 // order). Nevertheless, we check for it to be compatible with

	164 // Spidermonkey.

	165 return c == 0xFEFF \|\| c == 0xFFFE;

	166 }

	167

	168

	169 bool JavaScriptScanner::SkipWhiteSpace() {

	170 int start_position = source_pos();

	171

	172 while (true) {

	173 // We treat byte-order marks (BOMs) as whitespace for better

	174 // compatibility with Spidermonkey and other JavaScript engines.

	175 while (ScannerConstants::kIsWhiteSpace.get(c0_) \|\| IsByteOrderMark(c0_)) {

	176 // IsWhiteSpace() includes line terminators!

	177 if (ScannerConstants::kIsLineTerminator.get(c0_)) {

	178 // Ignore line terminators, but remember them. This is necessary

	179 // for automatic semicolon insertion.

	180 has_line_terminator_before_next_ = true;

	181 }

	182 Advance();

	183 }

	184

	185 // If there is an HTML comment end '-->' at the beginning of a

	186 // line (with only whitespace in front of it), we treat the rest

	187 // of the line as a comment. This is in line with the way

	188 // SpiderMonkey handles it.

	189 if (c0_ == '-' && has_line_terminator_before_next_) {

	190 Advance();

	191 if (c0_ == '-') {

	192 Advance();

	193 if (c0_ == '>') {

	194 // Treat the rest of the line as a comment.

	195 SkipSingleLineComment();

	196 // Continue skipping white space after the comment.

	197 continue;

	198 }

	199 PushBack('-'); // undo Advance()

	200 }

	201 PushBack('-'); // undo Advance()

	202 }

	203 // Return whether or not we skipped any characters.

	204 return source_pos() != start_position;

	205 }

	206 }

	207

	208

	209 Token::Value JavaScriptScanner::SkipSingleLineComment() {

	210 Advance();

	211

	212 // The line terminator at the end of the line is not considered

	213 // to be part of the single-line comment; it is recognized

	214 // separately by the lexical grammar and becomes part of the

	215 // stream of input elements for the syntactic grammar (see

	216 // ECMA-262, section 7.4, page 12).

	217 while (c0_ >= 0 && !ScannerConstants::kIsLineTerminator.get(c0_)) {

	218 Advance();

	219 }

	220

	221 return Token::WHITESPACE;

	222 }

	223

	224

	225 Token::Value JavaScriptScanner::SkipMultiLineComment() {

	226 ASSERT(c0_ == '*');

	227 Advance();

	228

	229 while (c0_ >= 0) {

	230 char ch = c0_;

	231 Advance();

	232 // If we have reached the end of the multi-line comment, we

	233 // consume the '/' and insert a whitespace. This way all

	234 // multi-line comments are treated as whitespace - even the ones

	235 // containing line terminators. This contradicts ECMA-262, section

	236 // 7.4, page 12, that says that multi-line comments containing

	237 // line terminators should be treated as a line terminator, but it

	238 // matches the behaviour of SpiderMonkey and KJS.

	239 if (ch == '*' && c0_ == '/') {

	240 c0_ = ' ';

	241 return Token::WHITESPACE;

	242 }

	243 }

	244

	245 // Unterminated multi-line comment.

	246 return Token::ILLEGAL;

	247 }

	248

	249

	250 Token::Value JavaScriptScanner::ScanHtmlComment() {

	251 // Check for <!-- comments.

	252 ASSERT(c0_ == '!');

	253 Advance();

	254 if (c0_ == '-') {

	255 Advance();

	256 if (c0_ == '-') return SkipSingleLineComment();

	257 PushBack('-'); // undo Advance()

	258 }

	259 PushBack('!'); // undo Advance()

	260 ASSERT(c0_ == '!');

	261 return Token::LT;

	262 }

	263

	264

	265 void JavaScriptScanner::Scan() {

	266 next_.literal_chars = Vector<const char>();

	267 Token::Value token;

	268 do {

	269 // Remember the position of the next token

	270 next_.location.beg_pos = source_pos();

	271

	272 switch (c0_) {

	273 case ' ':

	274 case '\t':

	275 Advance();

	276 token = Token::WHITESPACE;

	277 break;

	278

	279 case '\n':

	280 Advance();

	281 has_line_terminator_before_next_ = true;

	282 token = Token::WHITESPACE;

	283 break;

	284

	285 case '"': case '\'':

	286 token = ScanString();

	287 break;

	288

	289 case '<':

	290 // < <= << <<= <!--

	291 Advance();

	292 if (c0_ == '=') {

	293 token = Select(Token::LTE);

	294 } else if (c0_ == '<') {

	295 token = Select('=', Token::ASSIGN_SHL, Token::SHL);

	296 } else if (c0_ == '!') {

	297 token = ScanHtmlComment();

	298 } else {

	299 token = Token::LT;

	300 }

	301 break;

	302

	303 case '>':

	304 // > >= >> >>= >>> >>>=

	305 Advance();

	306 if (c0_ == '=') {

	307 token = Select(Token::GTE);

	308 } else if (c0_ == '>') {

	309 // >> >>= >>> >>>=

	310 Advance();

	311 if (c0_ == '=') {

	312 token = Select(Token::ASSIGN_SAR);

	313 } else if (c0_ == '>') {

	314 token = Select('=', Token::ASSIGN_SHR, Token::SHR);

	315 } else {

	316 token = Token::SAR;

	317 }

	318 } else {

	319 token = Token::GT;

	320 }

	321 break;

	322

	323 case '=':

	324 // = == ===

	325 Advance();

	326 if (c0_ == '=') {

	327 token = Select('=', Token::EQ_STRICT, Token::EQ);

	328 } else {

	329 token = Token::ASSIGN;

	330 }

	331 break;

	332

	333 case '!':

	334 // ! != !==

	335 Advance();

	336 if (c0_ == '=') {

	337 token = Select('=', Token::NE_STRICT, Token::NE);

	338 } else {

	339 token = Token::NOT;

	340 }

	341 break;

	342

	343 case '+':

	344 // + ++ +=

	345 Advance();

	346 if (c0_ == '+') {

	347 token = Select(Token::INC);

	348 } else if (c0_ == '=') {

	349 token = Select(Token::ASSIGN_ADD);

	350 } else {

	351 token = Token::ADD;

	352 }

	353 break;

	354

	355 case '-':

	356 // - -- --> -=

	357 Advance();

	358 if (c0_ == '-') {

	359 Advance();

	360 if (c0_ == '>' && has_line_terminator_before_next_) {

	361 // For compatibility with SpiderMonkey, we skip lines that

	362 // start with an HTML comment end '-->'.

	363 token = SkipSingleLineComment();

	364 } else {

	365 token = Token::DEC;

	366 }

	367 } else if (c0_ == '=') {

	368 token = Select(Token::ASSIGN_SUB);

	369 } else {

	370 token = Token::SUB;

	371 }

	372 break;

	373

	374 case '*':

	375 // * *=

	376 token = Select('=', Token::ASSIGN_MUL, Token::MUL);

	377 break;

	378

	379 case '%':

	380 // % %=

	381 token = Select('=', Token::ASSIGN_MOD, Token::MOD);

	382 break;

	383

	384 case '/':

	385 // / // /* /=

	386 Advance();

	387 if (c0_ == '/') {

	388 token = SkipSingleLineComment();

	389 } else if (c0_ == '*') {

	390 token = SkipMultiLineComment();

	391 } else if (c0_ == '=') {

	392 token = Select(Token::ASSIGN_DIV);

	393 } else {

	394 token = Token::DIV;

	395 }

	396 break;

	397

	398 case '&':

	399 // & && &=

	400 Advance();

	401 if (c0_ == '&') {

	402 token = Select(Token::AND);

	403 } else if (c0_ == '=') {

	404 token = Select(Token::ASSIGN_BIT_AND);

	405 } else {

	406 token = Token::BIT_AND;

	407 }

	408 break;

	409

	410 case '\|':

	411 // \| \|\| \|=

	412 Advance();

	413 if (c0_ == '\|') {

	414 token = Select(Token::OR);

	415 } else if (c0_ == '=') {

	416 token = Select(Token::ASSIGN_BIT_OR);

	417 } else {

	418 token = Token::BIT_OR;

	419 }

	420 break;

	421

	422 case '^':

	423 // ^ ^=

	424 token = Select('=', Token::ASSIGN_BIT_XOR, Token::BIT_XOR);

	425 break;

	426

	427 case '.':

	428 // . Number

	429 Advance();

	430 if (IsDecimalDigit(c0_)) {

	431 token = ScanNumber(true);

	432 } else {

	433 token = Token::PERIOD;

	434 }

	435 break;

	436

	437 case ':':

	438 token = Select(Token::COLON);

	439 break;

	440

	441 case ';':

	442 token = Select(Token::SEMICOLON);

	443 break;

	444

	445 case ',':

	446 token = Select(Token::COMMA);

	447 break;

	448

	449 case '(':

	450 token = Select(Token::LPAREN);

	451 break;

	452

	453 case ')':

	454 token = Select(Token::RPAREN);

	455 break;

	456

	457 case '[':

	458 token = Select(Token::LBRACK);

	459 break;

	460

	461 case ']':

	462 token = Select(Token::RBRACK);

	463 break;

	464

	465 case '{':

	466 token = Select(Token::LBRACE);

	467 break;

	468

	469 case '}':

	470 token = Select(Token::RBRACE);

	471 break;

	472

	473 case '?':

	474 token = Select(Token::CONDITIONAL);

	475 break;

	476

	477 case '~':

	478 token = Select(Token::BIT_NOT);

	479 break;

	480

	481 default:

	482 if (ScannerConstants::kIsIdentifierStart.get(c0_)) {

	483 token = ScanIdentifier();

	484 } else if (IsDecimalDigit(c0_)) {

	485 token = ScanNumber(false);

	486 } else if (SkipWhiteSpace()) {

	487 token = Token::WHITESPACE;

	488 } else if (c0_ < 0) {

	489 token = Token::EOS;

	490 } else {

	491 token = Select(Token::ILLEGAL);

	492 }

	493 break;

	494 }

	495

	496 // Continue scanning for tokens as long as we're just skipping

	497 // whitespace.

	498 } while (token == Token::WHITESPACE);

	499

	500 next_.location.end_pos = source_pos();

	501 next_.token = token;

	502 }

	503

	504

	505 void JavaScriptScanner::SeekForward(int pos) {

	506 source_->SeekForward(pos - 1);

	507 Advance();

	508 // This function is only called to seek to the location

	509 // of the end of a function (at the "}" token). It doesn't matter

	510 // whether there was a line terminator in the part we skip.

	511 has_line_terminator_before_next_ = false;

	512 Scan();

	513 }

	514

	515

	516 void JavaScriptScanner::ScanEscape() {

	517 uc32 c = c0_;

	518 Advance();

	519

	520 // Skip escaped newlines.

	521 if (ScannerConstants::kIsLineTerminator.get(c)) {

	522 // Allow CR+LF newlines in multiline string literals.

	523 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();

	524 // Allow LF+CR newlines in multiline string literals.

	525 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance();

	526 return;

	527 }

	528

	529 switch (c) {

	530 case '\'': // fall through

	531 case '"' : // fall through

	532 case '\\': break;

	533 case 'b' : c = '\b'; break;

	534 case 'f' : c = '\f'; break;

	535 case 'n' : c = '\n'; break;

	536 case 'r' : c = '\r'; break;

	537 case 't' : c = '\t'; break;

	538 case 'u' : c = ScanHexEscape(c, 4); break;

	539 case 'v' : c = '\v'; break;

	540 case 'x' : c = ScanHexEscape(c, 2); break;

	541 case '0' : // fall through

	542 case '1' : // fall through

	543 case '2' : // fall through

	544 case '3' : // fall through

	545 case '4' : // fall through

	546 case '5' : // fall through

	547 case '6' : // fall through

	548 case '7' : c = ScanOctalEscape(c, 2); break;

	549 }

	550

	551 // According to ECMA-262, 3rd, 7.8.4 (p 18ff) these

	552 // should be illegal, but they are commonly handled

	553 // as non-escaped characters by JS VMs.

	554 AddLiteralChar(c);

	555 }

	556

	557

	558 Token::Value JavaScriptScanner::ScanString() {

	559 uc32 quote = c0_;

	560 Advance(); // consume quote

	561

	562 LiteralScope literal(this);

	563 while (c0_ != quote && c0_ >= 0

	564 && !ScannerConstants::kIsLineTerminator.get(c0_)) {

	565 uc32 c = c0_;

	566 Advance();

	567 if (c == '\\') {

	568 if (c0_ < 0) return Token::ILLEGAL;

	569 ScanEscape();

	570 } else {

	571 AddLiteralChar(c);

	572 }

	573 }

	574 if (c0_ != quote) return Token::ILLEGAL;

	575 literal.Complete();

	576

	577 Advance(); // consume quote

	578 return Token::STRING;

	579 }

	580

	581

	582 void JavaScriptScanner::ScanDecimalDigits() {

	583 while (IsDecimalDigit(c0_))

	584 AddLiteralCharAdvance();

	585 }

	586

	587

	588 Token::Value JavaScriptScanner::ScanNumber(bool seen_period) {

	589 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction

	590

	591 enum { DECIMAL, HEX, OCTAL } kind = DECIMAL;

	592

	593 LiteralScope literal(this);

	594 if (seen_period) {

	595 // we have already seen a decimal point of the float

	596 AddLiteralChar('.');

	597 ScanDecimalDigits(); // we know we have at least one digit

	598

	599 } else {

	600 // if the first character is '0' we must check for octals and hex

	601 if (c0_ == '0') {

	602 AddLiteralCharAdvance();

	603

	604 // either 0, 0exxx, 0Exxx, 0.xxx, an octal number, or a hex number

	605 if (c0_ == 'x' \|\| c0_ == 'X') {

	606 // hex number

	607 kind = HEX;

	608 AddLiteralCharAdvance();

	609 if (!IsHexDigit(c0_)) {

	610 // we must have at least one hex digit after 'x'/'X'

	611 return Token::ILLEGAL;

	612 }

	613 while (IsHexDigit(c0_)) {

	614 AddLiteralCharAdvance();

	615 }

	616 } else if ('0' <= c0_ && c0_ <= '7') {

	617 // (possible) octal number

	618 kind = OCTAL;

	619 while (true) {

	620 if (c0_ == '8' \|\| c0_ == '9') {

	621 kind = DECIMAL;

	622 break;

	623 }

	624 if (c0_ < '0' \|\| '7' < c0_) break;

	625 AddLiteralCharAdvance();

	626 }

	627 }

	628 }

	629

	630 // Parse decimal digits and allow trailing fractional part.

	631 if (kind == DECIMAL) {

	632 ScanDecimalDigits(); // optional

	633 if (c0_ == '.') {

	634 AddLiteralCharAdvance();

	635 ScanDecimalDigits(); // optional

	636 }

	637 }

	638 }

	639

	640 // scan exponent, if any

	641 if (c0_ == 'e' \|\| c0_ == 'E') {

	642 ASSERT(kind != HEX); // 'e'/'E' must be scanned as part of the hex number

	643 if (kind == OCTAL) return Token::ILLEGAL; // no exponent for octals allowed

	644 // scan exponent

	645 AddLiteralCharAdvance();

	646 if (c0_ == '+' \|\| c0_ == '-')

	647 AddLiteralCharAdvance();

	648 if (!IsDecimalDigit(c0_)) {

	649 // we must have at least one decimal digit after 'e'/'E'

	650 return Token::ILLEGAL;

	651 }

	652 ScanDecimalDigits();

	653 }

	654

	655 // The source character immediately following a numeric literal must

	656 // not be an identifier start or a decimal digit; see ECMA-262

	657 // section 7.8.3, page 17 (note that we read only one decimal digit

	658 // if the value is 0).

	659 if (IsDecimalDigit(c0_) \|\| ScannerConstants::kIsIdentifierStart.get(c0_))

	660 return Token::ILLEGAL;

	661

	662 literal.Complete();

	663

	664 return Token::NUMBER;

	665 }

	666

	667

	668 uc32 JavaScriptScanner::ScanIdentifierUnicodeEscape() {

	669 Advance();

	670 if (c0_ != 'u') return unibrow::Utf8::kBadChar;

	671 Advance();

	672 uc32 c = ScanHexEscape('u', 4);

	673 // We do not allow a unicode escape sequence to start another

	674 // unicode escape sequence.

	675 if (c == '\\') return unibrow::Utf8::kBadChar;

	676 return c;

	677 }

	678

	679

	680 Token::Value JavaScriptScanner::ScanIdentifier() {

	681 ASSERT(ScannerConstants::kIsIdentifierStart.get(c0_));

	682

	683 LiteralScope literal(this);

	684 KeywordMatcher keyword_match;

	685

	686 // Scan identifier start character.

	687 if (c0_ == '\\') {

	688 uc32 c = ScanIdentifierUnicodeEscape();

	689 // Only allow legal identifier start characters.

	690 if (!ScannerConstants::kIsIdentifierStart.get(c)) return Token::ILLEGAL;

	691 AddLiteralChar(c);

	692 keyword_match.Fail();

	693 } else {

	694 AddLiteralChar(c0_);

	695 keyword_match.AddChar(c0_);

	696 Advance();

	697 }

	698

	699 // Scan the rest of the identifier characters.

	700 while (ScannerConstants::kIsIdentifierPart.get(c0_)) {

	701 if (c0_ == '\\') {

	702 uc32 c = ScanIdentifierUnicodeEscape();

	703 // Only allow legal identifier part characters.

	704 if (!ScannerConstants::kIsIdentifierPart.get(c)) return Token::ILLEGAL;

	705 AddLiteralChar(c);

	706 keyword_match.Fail();

	707 } else {

	708 AddLiteralChar(c0_);

	709 keyword_match.AddChar(c0_);

	710 Advance();

	711 }

	712 }

	713 literal.Complete();

	714

	715 return keyword_match.token();

	716 }

	717

	718

	719

	720 bool JavaScriptScanner::ScanRegExpPattern(bool seen_equal) {

	721 // Scan: ('/' \| '/=') RegularExpressionBody '/' RegularExpressionFlags

	722 bool in_character_class = false;

	723

	724 // Previous token is either '/' or '/=', in the second case, the

	725 // pattern starts at =.

	726 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1);

	727 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0);

	728

	729 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,

	730 // the scanner should pass uninterpreted bodies to the RegExp

	731 // constructor.

	732 LiteralScope literal(this);

	733 if (seen_equal)

	734 AddLiteralChar('=');

	735

	736 while (c0_ != '/' \|\| in_character_class) {

	737 if (ScannerConstants::kIsLineTerminator.get(c0_) \|\| c0_ < 0) return false;

	738 if (c0_ == '\\') { // escaped character

	739 AddLiteralCharAdvance();

	740 if (ScannerConstants::kIsLineTerminator.get(c0_) \|\| c0_ < 0) return false;

	741 AddLiteralCharAdvance();

	742 } else { // unescaped character

	743 if (c0_ == '[') in_character_class = true;

	744 if (c0_ == ']') in_character_class = false;

	745 AddLiteralCharAdvance();

	746 }

	747 }

	748 Advance(); // consume '/'

	749

	750 literal.Complete();

	751

	752 return true;

	753 }

	754

	755 bool JavaScriptScanner::ScanRegExpFlags() {

	756 // Scan regular expression flags.

	757 LiteralScope literal(this);

	758 while (ScannerConstants::kIsIdentifierPart.get(c0_)) {

	759 if (c0_ == '\\') {

	760 uc32 c = ScanIdentifierUnicodeEscape();

	761 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) {

	762 // We allow any escaped character, unlike the restriction on

	763 // IdentifierPart when it is used to build an IdentifierName.

	764 AddLiteralChar(c);

	765 continue;

	766 }

	767 }

	768 AddLiteralCharAdvance();

	769 }

	770 literal.Complete();

	771

	772 next_.location.end_pos = source_pos() - 1;

	773 return true;

	774 }

	775

	776 // ----------------------------------------------------------------------------

64 // Keyword Matcher	777 // Keyword Matcher

65	778

66 KeywordMatcher::FirstState KeywordMatcher::first_states_[] = {	779 KeywordMatcher::FirstState KeywordMatcher::first_states_[] = {

67 { "break", KEYWORD_PREFIX, Token::BREAK },	780 { "break", KEYWORD_PREFIX, Token::BREAK },

68 { NULL, C, Token::ILLEGAL },	781 { NULL, C, Token::ILLEGAL },

69 { NULL, D, Token::ILLEGAL },	782 { NULL, D, Token::ILLEGAL },

70 { "else", KEYWORD_PREFIX, Token::ELSE },	783 { "else", KEYWORD_PREFIX, Token::ELSE },

71 { NULL, F, Token::ILLEGAL },	784 { NULL, F, Token::ILLEGAL },

72 { NULL, UNMATCHABLE, Token::ILLEGAL },	785 { NULL, UNMATCHABLE, Token::ILLEGAL },

73 { NULL, UNMATCHABLE, Token::ILLEGAL },	786 { NULL, UNMATCHABLE, Token::ILLEGAL },

(...skipping 112 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
186 if (MatchKeywordStart(input, "with", 1, Token::WITH)) return;	899 if (MatchKeywordStart(input, "with", 1, Token::WITH)) return;

187 break;	900 break;

188 case UNMATCHABLE:	901 case UNMATCHABLE:

189 break;	902 break;

190 }	903 }

191 // On fallthrough, it's a failure.	904 // On fallthrough, it's a failure.

192 state_ = UNMATCHABLE;	905 state_ = UNMATCHABLE;

193 }	906 }

194	907

195 } } // namespace v8::internal	908 } } // namespace v8::internal

OLD	NEW

« no previous file with comments | « src/scanner-base.h ('k') | test/cctest/test-parsing.cc » ('j') | no next file with comments »