src/prescanner.h - Issue 5136002: Extract scanner base/JS/JSON and move base and JS to scanner-base.

Side by Side Diff: src/prescanner.h

Issue 5136002: Extract scanner base/JS/JSON and move base and JS to scanner-base. (Closed)

Patch Set: Created 10 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2010 the V8 project authors. All rights reserved.	1 // Copyright 2010 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 22 matching lines...) Expand all Loading...
33 #include "utils.h"	33 #include "utils.h"

34 #include "scanner-base.h"	34 #include "scanner-base.h"

35	35

36 namespace v8 {	36 namespace v8 {

37 namespace preparser {	37 namespace preparser {

38	38

39 namespace i = v8::internal;	39 namespace i = v8::internal;

40	40

41 typedef int uc32;	41 typedef int uc32;

42	42

43 int HexValue(uc32 c) {

44 int res = c \| 0x20; // Uppercase letters.

45 int is_digit = (c & 0x10) >> 4; // 0 if non-digit, 1 if digit.

46 // What to add to digits to make them consecutive with 'a'-'f' letters.

47 int kDelta = 'a' - '9' - 1;

48 // What to subtract to digits and letters to get them back to the range 0..15.

49 int kStart = '0' + kDelta;

50 res -= kStart;

51 res += kDelta * is_digit;

52 return res;

53 }

54

55

56 class PreScannerStackGuard {	43 class PreScannerStackGuard {

57 public:	44 public:

58 explicit PreScannerStackGuard(int max_size)	45 explicit PreScannerStackGuard(int max_size)

59 : limit_(StackPoint().at() - max_size) { }	46 : limit_(StackPoint().at() - max_size) { }

60 bool has_overflowed() {	47 bool has_overflowed() {

61 return StackPoint().at() < limit_;	48 return StackPoint().at() < limit_;

62 }	49 }

63 private:	50 private:

64 class StackPoint {	51 class StackPoint {

65 public:	52 public:

66 char* at() { return reinterpret_cast<char*>(this); }	53 char* at() { return reinterpret_cast<char*>(this); }

67 };	54 };

68 char* limit_;	55 char* limit_;

69 };	56 };

70	57

71	58

72 // Scanner for preparsing.	59 // Scanner for preparsing.

73 // InputStream is a source of UC16 characters with limited push-back.	60 // InputStream is a source of UC16 characters with limited push-back.

74 // LiteralsBuffer is a collector of (UTF-8) characters used to capture literals.	61 // LiteralsBuffer is a collector of (UTF-8) characters used to capture literals.

75 template <typename InputStream, typename LiteralsBuffer>

76 class Scanner {	62 class Scanner {

77 public:	63 public:

78 enum LiteralType {	64 enum LiteralType {

79 kLiteralNumber,	65 kLiteralNumber,

80 kLiteralIdentifier,	66 kLiteralIdentifier,

81 kLiteralString,	67 kLiteralString,

82 kLiteralRegExp,	68 kLiteralRegExp,

83 kLiteralRegExpFlags	69 kLiteralRegExpFlags

84 };	70 };

85	71

86 class LiteralScope {	72 class LiteralScope {

87 public:	73 public:

88 explicit LiteralScope(Scanner* self, LiteralType type);	74 explicit LiteralScope(Scanner* self, LiteralType type);

89 ~LiteralScope();	75 ~LiteralScope();

90 void Complete();	76 void Complete();

91	77

92 private:	78 private:

93 Scanner* scanner_;	79 Scanner* scanner_;

94 bool complete_;	80 bool complete_;

95 };	81 };

96	82

97 Scanner();	83 Scanner();

98	84

99 void Initialize(InputStream* stream);	85 void Initialize(i::UTF16Buffer* stream);

100	86

101 // Returns the next token.	87 // Returns the next token.

102 i::Token::Value Next();	88 i::Token::Value Next();

103	89

104 // Returns the current token again.	90 // Returns the current token again.

105 i::Token::Value current_token() { return current_.token; }	91 i::Token::Value current_token() { return current_.token; }

106	92

107 // One token look-ahead (past the token returned by Next()).	93 // One token look-ahead (past the token returned by Next()).

108 i::Token::Value peek() const { return next_.token; }	94 i::Token::Value peek() const { return next_.token; }

109	95

(...skipping 34 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
144 i::Vector<const char> literal() const {	130 i::Vector<const char> literal() const {

145 return i::Vector<const char>(literal_string(), literal_length());	131 return i::Vector<const char>(literal_string(), literal_length());

146 }	132 }

147	133

148 // Returns the literal string for the next token (the token that	134 // Returns the literal string for the next token (the token that

149 // would be returned if Next() were called).	135 // would be returned if Next() were called).

150 const char* next_literal_string() const {	136 const char* next_literal_string() const {

151 return next_.literal_chars;	137 return next_.literal_chars;

152 }	138 }

153	139

154

155 // Returns the length of the next token (that would be returned if	140 // Returns the length of the next token (that would be returned if

156 // Next() were called).	141 // Next() were called).

157 int next_literal_length() const {	142 int next_literal_length() const {

158 // Excluding terminal '\x00' added by TerminateLiteral().	143 // Excluding terminal '\x00' added by TerminateLiteral().

159 return next_.literal_length - 1;	144 return next_.literal_length - 1;

160 }	145 }

161	146

162 i::Vector<const char> next_literal() const {	147 i::Vector<const char> next_literal() const {

163 return i::Vector<const char>(next_literal_string(), next_literal_length());	148 return i::Vector<const char>(next_literal_string(), next_literal_length());

164 }	149 }

(...skipping 78 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
243 // If the escape sequence cannot be decoded the result is kBadRune.	228 // If the escape sequence cannot be decoded the result is kBadRune.

244 uc32 ScanIdentifierUnicodeEscape();	229 uc32 ScanIdentifierUnicodeEscape();

245	230

246 PreScannerStackGuard stack_guard_;	231 PreScannerStackGuard stack_guard_;

247	232

248 TokenDesc current_; // desc for current token (as returned by Next())	233 TokenDesc current_; // desc for current token (as returned by Next())

249 TokenDesc next_; // desc for next token (one token look-ahead)	234 TokenDesc next_; // desc for next token (one token look-ahead)

250 bool has_line_terminator_before_next_;	235 bool has_line_terminator_before_next_;

251	236

252 // Source.	237 // Source.

253 InputStream* source_;	238 i::UTF16Buffer* source_;

254	239

255 // Buffer to hold literal values (identifiers, strings, numerals, regexps and	240 // Buffer to hold literal values (identifiers, strings, numerals, regexps and

256 // regexp flags) using '\x00'-terminated UTF-8 encoding.	241 // regexp flags) using '\x00'-terminated UTF-8 encoding.

257 // Handles allocation internally.	242 // Handles allocation internally.

258 // Notice that the '\x00' termination is meaningless for strings and regexps	243 // Notice that the '\x00' termination is meaningless for strings and regexps

259 // which may contain the zero-character, but can be used as terminator for	244 // which may contain the zero-character, but can be used as terminator for

260 // identifiers, numerals and regexp flags.	245 // identifiers, numerals and regexp flags.Collector

261 LiteralsBuffer literal_buffer_;	246 i::LiteralCollector literal_buffer_;

262	247

263 bool stack_overflow_;	248 bool stack_overflow_;

264	249

265 // One Unicode character look-ahead; c0_ < 0 at the end of the input.	250 // One Unicode character look-ahead; c0_ < 0 at the end of the input.

266 uc32 c0_;	251 uc32 c0_;

267 };	252 };

268	253

269	254

270 // ----------------------------------------------------------------------------	255 // ----------------------------------------------------------------------------

271 // Scanner::LiteralScope	256 // Scanner::LiteralScope

272	257

273 template <typename InputStream, typename LiteralsBuffer>	258 Scanner::LiteralScope::LiteralScope(

274 Scanner<InputStream, LiteralsBuffer>::LiteralScope::LiteralScope(

275 Scanner* self, LiteralType type)	259 Scanner* self, LiteralType type)

276 : scanner_(self), complete_(false) {	260 : scanner_(self), complete_(false) {

277 self->StartLiteral(type);	261 self->StartLiteral(type);

278 }	262 }

279	263

280	264

281 template <typename InputStream, typename LiteralsBuffer>	265 Scanner::LiteralScope::~LiteralScope() {

282 Scanner<InputStream, LiteralsBuffer>::LiteralScope::~LiteralScope() {

283 if (!complete_) scanner_->DropLiteral();	266 if (!complete_) scanner_->DropLiteral();

284 }	267 }

285	268

286 template <typename InputStream, typename LiteralsBuffer>	269 void Scanner::LiteralScope::Complete() {

287 void Scanner<InputStream, LiteralsBuffer>::LiteralScope::Complete() {

288 scanner_->TerminateLiteral();	270 scanner_->TerminateLiteral();

289 complete_ = true;	271 complete_ = true;

290 }	272 }

291	273

292	274

293 // ----------------------------------------------------------------------------	275 // ----------------------------------------------------------------------------

294 // Scanner.	276 // Scanner.

295 template <typename InputStream, typename LiteralsBuffer>	277 Scanner::Scanner()

296 Scanner<InputStream, LiteralsBuffer>::Scanner()

297 : stack_guard_(kMaxStackSize),	278 : stack_guard_(kMaxStackSize),

298 has_line_terminator_before_next_(false),	279 has_line_terminator_before_next_(false),

299 source_(NULL),	280 source_(NULL),

300 stack_overflow_(false) {}	281 stack_overflow_(false) {}

301	282

302	283

303 template <typename InputStream, typename LiteralsBuffer>	284 void Scanner::Initialize(i::UTF16Buffer* stream) {

304 void Scanner<InputStream, LiteralsBuffer>::Initialize(InputStream* stream) {

305 source_ = stream;	285 source_ = stream;

306	286

307 // Initialize current_ to not refer to a literal.	287 // Initialize current_ to not refer to a literal.

308 current_.literal_length = 0;	288 current_.literal_length = 0;

309 // Reset literal buffer.	289 // Reset literal buffer.

310 literal_buffer_.Reset();	290 literal_buffer_.Reset();

311	291

312 // Set c0_ (one character ahead)	292 // Set c0_ (one character ahead)

313 ASSERT(kCharacterLookaheadBufferSize == 1);	293 ASSERT(kCharacterLookaheadBufferSize == 1);

314 Advance();	294 Advance();

315	295

316 // Skip initial whitespace allowing HTML comment ends just like	296 // Skip initial whitespace allowing HTML comment ends just like

317 // after a newline and scan first token.	297 // after a newline and scan first token.

318 has_line_terminator_before_next_ = true;	298 has_line_terminator_before_next_ = true;

319 SkipWhiteSpace();	299 SkipWhiteSpace();

320 Scan();	300 Scan();

321 }	301 }

322	302

323	303

324 template <typename InputStream, typename LiteralsBuffer>	304 i::Token::Value Scanner::Next() {

325 i::Token::Value Scanner<InputStream, LiteralsBuffer>::Next() {

326 // BUG 1215673: Find a thread safe way to set a stack limit in	305 // BUG 1215673: Find a thread safe way to set a stack limit in

327 // pre-parse mode. Otherwise, we cannot safely pre-parse from other	306 // pre-parse mode. Otherwise, we cannot safely pre-parse from other

328 // threads.	307 // threads.

329 current_ = next_;	308 current_ = next_;

330 // Check for stack-overflow before returning any tokens.	309 // Check for stack-overflow before returning any tokens.

331 if (stack_guard_.has_overflowed()) {	310 if (stack_guard_.has_overflowed()) {

332 stack_overflow_ = true;	311 stack_overflow_ = true;

333 next_.token = i::Token::ILLEGAL;	312 next_.token = i::Token::ILLEGAL;

334 } else {	313 } else {

335 has_line_terminator_before_next_ = false;	314 has_line_terminator_before_next_ = false;

336 Scan();	315 Scan();

337 }	316 }

338 return current_.token;	317 return current_.token;

339 }	318 }

340	319

341	320

342 template <typename InputStream, typename LiteralsBuffer>	321 void Scanner::StartLiteral(LiteralType type) {

343 void Scanner<InputStream, LiteralsBuffer>::StartLiteral(LiteralType type) {

344 // Only record string and literal identifiers when preparsing.	322 // Only record string and literal identifiers when preparsing.

345 // Those are the ones that are recorded as symbols. Numbers and	323 // Those are the ones that are recorded as symbols. Numbers and

346 // regexps are not recorded.	324 // regexps are not recorded.

347 if (type == kLiteralString \|\| type == kLiteralIdentifier) {	325 if (type == kLiteralString \|\| type == kLiteralIdentifier) {

348 literal_buffer_.StartLiteral();	326 literal_buffer_.StartLiteral();

349 }	327 }

350 }	328 }

351	329

352	330

353 template <typename InputStream, typename LiteralsBuffer>	331 void Scanner::AddLiteralChar(uc32 c) {

354 void Scanner<InputStream, LiteralsBuffer>::AddLiteralChar(uc32 c) {

355 literal_buffer_.AddChar(c);	332 literal_buffer_.AddChar(c);

356 }	333 }

357	334

358	335

359 template <typename InputStream, typename LiteralsBuffer>	336 void Scanner::TerminateLiteral() {

360 void Scanner<InputStream, LiteralsBuffer>::TerminateLiteral() {

361 i::Vector<const char> chars = literal_buffer_.EndLiteral();	337 i::Vector<const char> chars = literal_buffer_.EndLiteral();

362 next_.literal_chars = chars.start();	338 next_.literal_chars = chars.start();

363 next_.literal_length = chars.length();	339 next_.literal_length = chars.length();

364 }	340 }

365	341

366	342

367 template <typename InputStream, typename LiteralsBuffer>	343 void Scanner::DropLiteral() {

368 void Scanner<InputStream, LiteralsBuffer>::DropLiteral() {

369 literal_buffer_.DropLiteral();	344 literal_buffer_.DropLiteral();

370 }	345 }

371	346

372	347

373 template <typename InputStream, typename LiteralsBuffer>	348 void Scanner::AddLiteralCharAdvance() {

374 void Scanner<InputStream, LiteralsBuffer>::AddLiteralCharAdvance() {

375 AddLiteralChar(c0_);	349 AddLiteralChar(c0_);

376 Advance();	350 Advance();

377 }	351 }

378	352

379	353

380 static inline bool IsByteOrderMark(uc32 c) {	354 static inline bool IsByteOrderMark(uc32 c) {

381 // The Unicode value U+FFFE is guaranteed never to be assigned as a	355 // The Unicode value U+FFFE is guaranteed never to be assigned as a

382 // Unicode character; this implies that in a Unicode context the	356 // Unicode character; this implies that in a Unicode context the

383 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF	357 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF

384 // character expressed in little-endian byte order (since it could	358 // character expressed in little-endian byte order (since it could

385 // not be a U+FFFE character expressed in big-endian byte	359 // not be a U+FFFE character expressed in big-endian byte

386 // order). Nevertheless, we check for it to be compatible with	360 // order). Nevertheless, we check for it to be compatible with

387 // Spidermonkey.	361 // Spidermonkey.

388 return c == 0xFEFF \|\| c == 0xFFFE;	362 return c == 0xFEFF \|\| c == 0xFFFE;

389 }	363 }

390	364

391	365

392 template <typename InputStream, typename LiteralsBuffer>	366 bool Scanner::SkipWhiteSpace() {

393 bool Scanner<InputStream, LiteralsBuffer>::SkipWhiteSpace() {

394 int start_position = source_pos();	367 int start_position = source_pos();

395	368

396 while (true) {	369 while (true) {

397 // We treat byte-order marks (BOMs) as whitespace for better	370 // We treat byte-order marks (BOMs) as whitespace for better

398 // compatibility with Spidermonkey and other JavaScript engines.	371 // compatibility with Spidermonkey and other JavaScript engines.

399 while (i::ScannerConstants::kIsWhiteSpace.get(c0_)	372 while (i::ScannerConstants::kIsWhiteSpace.get(c0_)

400 \|\| IsByteOrderMark(c0_)) {	373 \|\| IsByteOrderMark(c0_)) {

401 // IsWhiteSpace() includes line terminators!	374 // IsWhiteSpace() includes line terminators!

402 if (i::ScannerConstants::kIsLineTerminator.get(c0_)) {	375 if (i::ScannerConstants::kIsLineTerminator.get(c0_)) {

403 // Ignore line terminators, but remember them. This is necessary	376 // Ignore line terminators, but remember them. This is necessary

(...skipping 20 matching lines...) Expand all Loading...
424 PushBack('-'); // undo Advance()	397 PushBack('-'); // undo Advance()

425 }	398 }

426 PushBack('-'); // undo Advance()	399 PushBack('-'); // undo Advance()

427 }	400 }

428 // Return whether or not we skipped any characters.	401 // Return whether or not we skipped any characters.

429 return source_pos() != start_position;	402 return source_pos() != start_position;

430 }	403 }

431 }	404 }

432	405

433	406

434 template <typename InputStream, typename LiteralsBuffer>	407 i::Token::Value Scanner::SkipSingleLineComment() {

435 i::Token::Value Scanner<InputStream, LiteralsBuffer>::SkipSingleLineComment() {

436 Advance();	408 Advance();

437	409

438 // The line terminator at the end of the line is not considered	410 // The line terminator at the end of the line is not considered

439 // to be part of the single-line comment; it is recognized	411 // to be part of the single-line comment; it is recognized

440 // separately by the lexical grammar and becomes part of the	412 // separately by the lexical grammar and becomes part of the

441 // stream of input elements for the syntactic grammar (see	413 // stream of input elements for the syntactic grammar (see

442 // ECMA-262, section 7.4, page 12).	414 // ECMA-262, section 7.4, page 12).

443 while (c0_ >= 0 && !i::ScannerConstants::kIsLineTerminator.get(c0_)) {	415 while (c0_ >= 0 && !i::ScannerConstants::kIsLineTerminator.get(c0_)) {

444 Advance();	416 Advance();

445 }	417 }

446	418

447 return i::Token::WHITESPACE;	419 return i::Token::WHITESPACE;

448 }	420 }

449	421

450	422

451 template <typename InputStream, typename LiteralsBuffer>	423 i::Token::Value Scanner::SkipMultiLineComment() {

452 i::Token::Value Scanner<InputStream, LiteralsBuffer>::SkipMultiLineComment() {

453 ASSERT(c0_ == '*');	424 ASSERT(c0_ == '*');

454 Advance();	425 Advance();

455	426

456 while (c0_ >= 0) {	427 while (c0_ >= 0) {

457 char ch = c0_;	428 char ch = c0_;

458 Advance();	429 Advance();

459 // If we have reached the end of the multi-line comment, we	430 // If we have reached the end of the multi-line comment, we

460 // consume the '/' and insert a whitespace. This way all	431 // consume the '/' and insert a whitespace. This way all

461 // multi-line comments are treated as whitespace - even the ones	432 // multi-line comments are treated as whitespace - even the ones

462 // containing line terminators. This contradicts ECMA-262, section	433 // containing line terminators. This contradicts ECMA-262, section

463 // 7.4, page 12, that says that multi-line comments containing	434 // 7.4, page 12, that says that multi-line comments containing

464 // line terminators should be treated as a line terminator, but it	435 // line terminators should be treated as a line terminator, but it

465 // matches the behaviour of SpiderMonkey and KJS.	436 // matches the behaviour of SpiderMonkey and KJS.

466 if (ch == '*' && c0_ == '/') {	437 if (ch == '*' && c0_ == '/') {

467 c0_ = ' ';	438 c0_ = ' ';

468 return i::Token::WHITESPACE;	439 return i::Token::WHITESPACE;

469 }	440 }

470 }	441 }

471	442

472 // Unterminated multi-line comment.	443 // Unterminated multi-line comment.

473 return i::Token::ILLEGAL;	444 return i::Token::ILLEGAL;

474 }	445 }

475	446

476	447

477 template <typename InputStream, typename LiteralsBuffer>	448 i::Token::Value Scanner::ScanHtmlComment() {

478 i::Token::Value Scanner<InputStream, LiteralsBuffer>::ScanHtmlComment() {

479 // Check for <!-- comments.	449 // Check for <!-- comments.

480 ASSERT(c0_ == '!');	450 ASSERT(c0_ == '!');

481 Advance();	451 Advance();

482 if (c0_ == '-') {	452 if (c0_ == '-') {

483 Advance();	453 Advance();

484 if (c0_ == '-') return SkipSingleLineComment();	454 if (c0_ == '-') return SkipSingleLineComment();

485 PushBack('-'); // undo Advance()	455 PushBack('-'); // undo Advance()

486 }	456 }

487 PushBack('!'); // undo Advance()	457 PushBack('!'); // undo Advance()

488 ASSERT(c0_ == '!');	458 ASSERT(c0_ == '!');

489 return i::Token::LT;	459 return i::Token::LT;

490 }	460 }

491	461

492	462

493 template <typename InputStream, typename LiteralsBuffer>	463 void Scanner::Scan() {

494 void Scanner<InputStream, LiteralsBuffer>::Scan() {

495 next_.literal_length = 0;	464 next_.literal_length = 0;

496 i::Token::Value token;	465 i::Token::Value token;

497 do {	466 do {

498 // Remember the position of the next token	467 // Remember the position of the next token

499 next_.location.beg_pos = source_pos();	468 next_.location.beg_pos = source_pos();

500	469

501 switch (c0_) {	470 switch (c0_) {

502 case ' ':	471 case ' ':

503 case '\t':	472 case '\t':

504 Advance();	473 Advance();

(...skipping 219 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
724	693

725 // Continue scanning for tokens as long as we're just skipping	694 // Continue scanning for tokens as long as we're just skipping

726 // whitespace.	695 // whitespace.

727 } while (token == i::Token::WHITESPACE);	696 } while (token == i::Token::WHITESPACE);

728	697

729 next_.location.end_pos = source_pos();	698 next_.location.end_pos = source_pos();

730 next_.token = token;	699 next_.token = token;

731 }	700 }

732	701

733	702

734 template <typename InputStream, typename LiteralsBuffer>	703 void Scanner::SeekForward(int pos) {

735 void Scanner<InputStream, LiteralsBuffer>::SeekForward(int pos) {

736 source_->SeekForward(pos - 1);	704 source_->SeekForward(pos - 1);

737 Advance();	705 Advance();

738 // This function is only called to seek to the location	706 // This function is only called to seek to the location

739 // of the end of a function (at the "}" token). It doesn't matter	707 // of the end of a function (at the "}" token). It doesn't matter

740 // whether there was a line terminator in the part we skip.	708 // whether there was a line terminator in the part we skip.

741 has_line_terminator_before_next_ = false;	709 has_line_terminator_before_next_ = false;

742 Scan();	710 Scan();

743 }	711 }

744	712

745	713

746 template <typename InputStream, typename LiteralsBuffer>	714 uc32 Scanner::ScanHexEscape(uc32 c, int length) {

747 uc32 Scanner<InputStream, LiteralsBuffer>::ScanHexEscape(uc32 c, int length) {

748 ASSERT(length <= 4); // prevent overflow	715 ASSERT(length <= 4); // prevent overflow

749	716

750 uc32 digits[4];	717 uc32 digits[4];

751 uc32 x = 0;	718 uc32 x = 0;

752 for (int i = 0; i < length; i++) {	719 for (int i = 0; i < length; i++) {

753 digits[i] = c0_;	720 digits[i] = c0_;

754 int d = HexValue(c0_);	721 int d = i::HexValue(c0_);

755 if (d < 0) {	722 if (d < 0) {

756 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes	723 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes

757 // should be illegal, but other JS VMs just return the	724 // should be illegal, but other JS VMs just return the

758 // non-escaped version of the original character.	725 // non-escaped version of the original character.

759	726

760 // Push back digits read, except the last one (in c0_).	727 // Push back digits read, except the last one (in c0_).

761 for (int j = i-1; j >= 0; j--) {	728 for (int j = i-1; j >= 0; j--) {

762 PushBack(digits[j]);	729 PushBack(digits[j]);

763 }	730 }

764 // Notice: No handling of error - treat it as "\u"->"u".	731 // Notice: No handling of error - treat it as "\u"->"u".

765 return c;	732 return c;

766 }	733 }

767 x = x * 16 + d;	734 x = x * 16 + d;

768 Advance();	735 Advance();

769 }	736 }

770	737

771 return x;	738 return x;

772 }	739 }

773	740

774	741

775 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of	742 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of

776 // ECMA-262. Other JS VMs support them.	743 // ECMA-262. Other JS VMs support them.

777 template <typename InputStream, typename LiteralsBuffer>	744 uc32 Scanner::ScanOctalEscape(

778 uc32 Scanner<InputStream, LiteralsBuffer>::ScanOctalEscape(

779 uc32 c, int length) {	745 uc32 c, int length) {

780 uc32 x = c - '0';	746 uc32 x = c - '0';

781 for (int i = 0; i < length; i++) {	747 for (int i = 0; i < length; i++) {

782 int d = c0_ - '0';	748 int d = c0_ - '0';

783 if (d < 0 \|\| d > 7) break;	749 if (d < 0 \|\| d > 7) break;

784 int nx = x * 8 + d;	750 int nx = x * 8 + d;

785 if (nx >= 256) break;	751 if (nx >= 256) break;

786 x = nx;	752 x = nx;

787 Advance();	753 Advance();

788 }	754 }

789 return x;	755 return x;

790 }	756 }

791	757

792	758

793 template <typename InputStream, typename LiteralsBuffer>	759 void Scanner::ScanEscape() {

794 void Scanner<InputStream, LiteralsBuffer>::ScanEscape() {

795 uc32 c = c0_;	760 uc32 c = c0_;

796 Advance();	761 Advance();

797	762

798 // Skip escaped newlines.	763 // Skip escaped newlines.

799 if (i::ScannerConstants::kIsLineTerminator.get(c)) {	764 if (i::ScannerConstants::kIsLineTerminator.get(c)) {

800 // Allow CR+LF newlines in multiline string literals.	765 // Allow CR+LF newlines in multiline string literals.

801 if (i::IsCarriageReturn(c) && i::IsLineFeed(c0_)) Advance();	766 if (i::IsCarriageReturn(c) && i::IsLineFeed(c0_)) Advance();

802 // Allow LF+CR newlines in multiline string literals.	767 // Allow LF+CR newlines in multiline string literals.

803 if (i::IsLineFeed(c) && i::IsCarriageReturn(c0_)) Advance();	768 if (i::IsLineFeed(c) && i::IsCarriageReturn(c0_)) Advance();

804 return;	769 return;

(...skipping 21 matching lines...) Expand all Loading...
826 case '7' : c = ScanOctalEscape(c, 2); break;	791 case '7' : c = ScanOctalEscape(c, 2); break;

827 }	792 }

828	793

829 // According to ECMA-262, 3rd, 7.8.4 (p 18ff) these	794 // According to ECMA-262, 3rd, 7.8.4 (p 18ff) these

830 // should be illegal, but they are commonly handled	795 // should be illegal, but they are commonly handled

831 // as non-escaped characters by JS VMs.	796 // as non-escaped characters by JS VMs.

832 AddLiteralChar(c);	797 AddLiteralChar(c);

833 }	798 }

834	799

835	800

836 template <typename InputStream, typename LiteralsBuffer>	801 i::Token::Value Scanner::ScanString() {

837 i::Token::Value Scanner<InputStream, LiteralsBuffer>::ScanString() {

838 uc32 quote = c0_;	802 uc32 quote = c0_;

839 Advance(); // consume quote	803 Advance(); // consume quote

840	804

841 LiteralScope literal(this, kLiteralString);	805 LiteralScope literal(this, kLiteralString);

842 while (c0_ != quote && c0_ >= 0	806 while (c0_ != quote && c0_ >= 0

843 && !i::ScannerConstants::kIsLineTerminator.get(c0_)) {	807 && !i::ScannerConstants::kIsLineTerminator.get(c0_)) {

844 uc32 c = c0_;	808 uc32 c = c0_;

845 Advance();	809 Advance();

846 if (c == '\\') {	810 if (c == '\\') {

847 if (c0_ < 0) return i::Token::ILLEGAL;	811 if (c0_ < 0) return i::Token::ILLEGAL;

848 ScanEscape();	812 ScanEscape();

849 } else {	813 } else {

850 AddLiteralChar(c);	814 AddLiteralChar(c);

851 }	815 }

852 }	816 }

853 if (c0_ != quote) return i::Token::ILLEGAL;	817 if (c0_ != quote) return i::Token::ILLEGAL;

854 literal.Complete();	818 literal.Complete();

855	819

856 Advance(); // consume quote	820 Advance(); // consume quote

857 return i::Token::STRING;	821 return i::Token::STRING;

858 }	822 }

859	823

860	824

861 template <typename InputStream, typename LiteralsBuffer>	825 i::Token::Value Scanner::Select(

862 i::Token::Value Scanner<InputStream, LiteralsBuffer>::Select(

863 i::Token::Value tok) {	826 i::Token::Value tok) {

864 Advance();	827 Advance();

865 return tok;	828 return tok;

866 }	829 }

867	830

868	831

869 template <typename InputStream, typename LiteralsBuffer>	832 i::Token::Value Scanner::Select(

870 i::Token::Value Scanner<InputStream, LiteralsBuffer>::Select(

871 uc32 next,	833 uc32 next,

872 i::Token::Value then,	834 i::Token::Value then,

873 i::Token::Value else_) {	835 i::Token::Value else_) {

874 Advance();	836 Advance();

875 if (c0_ == next) {	837 if (c0_ == next) {

876 Advance();	838 Advance();

877 return then;	839 return then;

878 } else {	840 } else {

879 return else_;	841 return else_;

880 }	842 }

881 }	843 }

882	844

883	845

884 // Returns true if any decimal digits were scanned, returns false otherwise.	846 // Returns true if any decimal digits were scanned, returns false otherwise.

885 template <typename InputStream, typename LiteralsBuffer>	847 void Scanner::ScanDecimalDigits() {

886 void Scanner<InputStream, LiteralsBuffer>::ScanDecimalDigits() {

887 while (i::IsDecimalDigit(c0_))	848 while (i::IsDecimalDigit(c0_))

888 AddLiteralCharAdvance();	849 AddLiteralCharAdvance();

889 }	850 }

890	851

891	852

892 template <typename InputStream, typename LiteralsBuffer>	853 i::Token::Value Scanner::ScanNumber(

893 i::Token::Value Scanner<InputStream, LiteralsBuffer>::ScanNumber(

894 bool seen_period) {	854 bool seen_period) {

895 // c0_ is the first digit of the number or the fraction.	855 // c0_ is the first digit of the number or the fraction.

896 ASSERT(i::IsDecimalDigit(c0_));	856 ASSERT(i::IsDecimalDigit(c0_));

897	857

898 enum { DECIMAL, HEX, OCTAL } kind = DECIMAL;	858 enum { DECIMAL, HEX, OCTAL } kind = DECIMAL;

899	859

900 LiteralScope literal(this, kLiteralNumber);	860 LiteralScope literal(this, kLiteralNumber);

901 if (seen_period) {	861 if (seen_period) {

902 // we have already seen a decimal point of the float	862 // we have already seen a decimal point of the float

903 AddLiteralChar('.');	863 AddLiteralChar('.');

(...skipping 62 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
966 if (i::IsDecimalDigit(c0_)	926 if (i::IsDecimalDigit(c0_)

967 \|\| i::ScannerConstants::kIsIdentifierStart.get(c0_))	927 \|\| i::ScannerConstants::kIsIdentifierStart.get(c0_))

968 return i::Token::ILLEGAL;	928 return i::Token::ILLEGAL;

969	929

970 literal.Complete();	930 literal.Complete();

971	931

972 return i::Token::NUMBER;	932 return i::Token::NUMBER;

973 }	933 }

974	934

975	935

976 template <typename InputStream, typename LiteralsBuffer>	936 uc32 Scanner::ScanIdentifierUnicodeEscape() {

977 uc32 Scanner<InputStream, LiteralsBuffer>::ScanIdentifierUnicodeEscape() {

978 Advance();	937 Advance();

979 if (c0_ != 'u') return unibrow::Utf8::kBadChar;	938 if (c0_ != 'u') return unibrow::Utf8::kBadChar;

980 Advance();	939 Advance();

981 uc32 c = ScanHexEscape('u', 4);	940 uc32 c = ScanHexEscape('u', 4);

982 // We do not allow a unicode escape sequence to start another	941 // We do not allow a unicode escape sequence to start another

983 // unicode escape sequence.	942 // unicode escape sequence.

984 if (c == '\\') return unibrow::Utf8::kBadChar;	943 if (c == '\\') return unibrow::Utf8::kBadChar;

985 return c;	944 return c;

986 }	945 }

987	946

988	947

989 template <typename InputStream, typename LiteralsBuffer>	948 i::Token::Value Scanner::ScanIdentifier() {

990 i::Token::Value Scanner<InputStream, LiteralsBuffer>::ScanIdentifier() {

991 ASSERT(i::ScannerConstants::kIsIdentifierStart.get(c0_));	949 ASSERT(i::ScannerConstants::kIsIdentifierStart.get(c0_));

992	950

993 LiteralScope literal(this, kLiteralIdentifier);	951 LiteralScope literal(this, kLiteralIdentifier);

994 i::KeywordMatcher keyword_match;	952 i::KeywordMatcher keyword_match;

995	953

996 // Scan identifier start character.	954 // Scan identifier start character.

997 if (c0_ == '\\') {	955 if (c0_ == '\\') {

998 uc32 c = ScanIdentifierUnicodeEscape();	956 uc32 c = ScanIdentifierUnicodeEscape();

999 // Only allow legal identifier start characters.	957 // Only allow legal identifier start characters.

1000 if (!i::ScannerConstants::kIsIdentifierStart.get(c)) {	958 if (!i::ScannerConstants::kIsIdentifierStart.get(c)) {

(...skipping 22 matching lines...) Expand all Loading...
1023 keyword_match.AddChar(c0_);	981 keyword_match.AddChar(c0_);

1024 Advance();	982 Advance();

1025 }	983 }

1026 }	984 }

1027 literal.Complete();	985 literal.Complete();

1028	986

1029 return keyword_match.token();	987 return keyword_match.token();

1030 }	988 }

1031	989

1032	990

1033 template <typename InputStream, typename LiteralsBuffer>	991 bool Scanner::ScanRegExpPattern(bool seen_equal) {

1034 bool Scanner<InputStream, LiteralsBuffer>::ScanRegExpPattern(bool seen_equal) {

1035 // Scan: ('/' \| '/=') RegularExpressionBody '/' RegularExpressionFlags	992 // Scan: ('/' \| '/=') RegularExpressionBody '/' RegularExpressionFlags

1036 bool in_character_class = false;	993 bool in_character_class = false;

1037	994

1038 // Previous token is either '/' or '/=', in the second case, the	995 // Previous token is either '/' or '/=', in the second case, the

1039 // pattern starts at =.	996 // pattern starts at =.

1040 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1);	997 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1);

1041 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0);	998 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0);

1042	999

1043 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,	1000 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,

1044 // the scanner should pass uninterpreted bodies to the RegExp	1001 // the scanner should pass uninterpreted bodies to the RegExp

(...skipping 18 matching lines...) Expand all Loading...
1063 AddLiteralCharAdvance();	1020 AddLiteralCharAdvance();

1064 }	1021 }

1065 }	1022 }

1066 Advance(); // consume '/'	1023 Advance(); // consume '/'

1067	1024

1068 literal.Complete();	1025 literal.Complete();

1069	1026

1070 return true;	1027 return true;

1071 }	1028 }

1072	1029

1073 template <typename InputStream, typename LiteralsBuffer>	1030 bool Scanner::ScanRegExpFlags() {

1074 bool Scanner<InputStream, LiteralsBuffer>::ScanRegExpFlags() {

1075 // Scan regular expression flags.	1031 // Scan regular expression flags.

1076 LiteralScope literal(this, kLiteralRegExpFlags);	1032 LiteralScope literal(this, kLiteralRegExpFlags);

1077 while (i::ScannerConstants::kIsIdentifierPart.get(c0_)) {	1033 while (i::ScannerConstants::kIsIdentifierPart.get(c0_)) {

1078 if (c0_ == '\\') {	1034 if (c0_ == '\\') {

1079 uc32 c = ScanIdentifierUnicodeEscape();	1035 uc32 c = ScanIdentifierUnicodeEscape();

1080 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) {	1036 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) {

1081 // We allow any escaped character, unlike the restriction on	1037 // We allow any escaped character, unlike the restriction on

1082 // IdentifierPart when it is used to build an IdentifierName.	1038 // IdentifierPart when it is used to build an IdentifierName.

1083 AddLiteralChar(c);	1039 AddLiteralChar(c);

1084 continue;	1040 continue;

1085 }	1041 }

1086 }	1042 }

1087 AddLiteralCharAdvance();	1043 AddLiteralCharAdvance();

1088 }	1044 }

1089 literal.Complete();	1045 literal.Complete();

1090	1046

1091 next_.location.end_pos = source_pos() - 1;	1047 next_.location.end_pos = source_pos() - 1;

1092 return true;	1048 return true;

1093 }	1049 }

1094	1050

1095	1051

1096 } } // namespace v8::preparser	1052 } } // namespace v8::preparser

1097	1053

1098 #endif // V8_PRESCANNER_H_	1054 #endif // V8_PRESCANNER_H_

OLD	NEW

« no previous file with comments | « src/parser.cc ('k') | src/scanner.h » ('j') | no next file with comments »