src/scanner-base.h - Issue 6529032: Merge 6168:6800 from bleeding_edge to experimental/gc branch.

Side by Side Diff: src/scanner-base.h

Issue 6529032: Merge 6168:6800 from bleeding_edge to experimental/gc branch. (Closed) Base URL: http://v8.googlecode.com/svn/branches/experimental/gc/

Patch Set: Created 9 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2010 the V8 project authors. All rights reserved.	1 // Copyright 2010 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 46 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
57 // Buffered stream of characters, using an internal UC16 buffer.	57 // Buffered stream of characters, using an internal UC16 buffer.

58	58

59 class UC16CharacterStream {	59 class UC16CharacterStream {

60 public:	60 public:

61 UC16CharacterStream() : pos_(0) { }	61 UC16CharacterStream() : pos_(0) { }

62 virtual ~UC16CharacterStream() { }	62 virtual ~UC16CharacterStream() { }

63	63

64 // Returns and advances past the next UC16 character in the input	64 // Returns and advances past the next UC16 character in the input

65 // stream. If there are no more characters, it returns a negative	65 // stream. If there are no more characters, it returns a negative

66 // value.	66 // value.

67 inline int32_t Advance() {	67 inline uc32 Advance() {

68 if (buffer_cursor_ < buffer_end_ \|\| ReadBlock()) {	68 if (buffer_cursor_ < buffer_end_ \|\| ReadBlock()) {

69 pos_++;	69 pos_++;

70 return *(buffer_cursor_++);	70 return static_cast<uc32>(*(buffer_cursor_++));

71 }	71 }

72 // Note: currently the following increment is necessary to avoid a	72 // Note: currently the following increment is necessary to avoid a

73 // parser problem! The scanner treats the final kEndOfInput as	73 // parser problem! The scanner treats the final kEndOfInput as

74 // a character with a position, and does math relative to that	74 // a character with a position, and does math relative to that

75 // position.	75 // position.

76 pos_++;	76 pos_++;

77	77

78 return kEndOfInput;	78 return kEndOfInput;

79 }	79 }

80	80

81 // Return the current position in the character stream.	81 // Return the current position in the character stream.

82 // Starts at zero.	82 // Starts at zero.

83 inline unsigned pos() const { return pos_; }	83 inline unsigned pos() const { return pos_; }

84	84

85 // Skips forward past the next character_count UC16 characters	85 // Skips forward past the next character_count UC16 characters

86 // in the input, or until the end of input if that comes sooner.	86 // in the input, or until the end of input if that comes sooner.

87 // Returns the number of characters actually skipped. If less	87 // Returns the number of characters actually skipped. If less

88 // than character_count,	88 // than character_count,

89 inline unsigned SeekForward(unsigned character_count) {	89 inline unsigned SeekForward(unsigned character_count) {

90 unsigned buffered_chars =	90 unsigned buffered_chars =

91 static_cast<unsigned>(buffer_end_ - buffer_cursor_);	91 static_cast<unsigned>(buffer_end_ - buffer_cursor_);

92 if (character_count <= buffered_chars) {	92 if (character_count <= buffered_chars) {

93 buffer_cursor_ += character_count;	93 buffer_cursor_ += character_count;

94 pos_ += character_count;	94 pos_ += character_count;

95 return character_count;	95 return character_count;

96 }	96 }

97 return SlowSeekForward(character_count);	97 return SlowSeekForward(character_count);

98 }	98 }

99	99

100 // Pushes back the most recently read UC16 character, i.e.,	100 // Pushes back the most recently read UC16 character (or negative

101 // the value returned by the most recent call to Advance.	101 // value if at end of input), i.e., the value returned by the most recent

	102 // call to Advance.

102 // Must not be used right after calling SeekForward.	103 // Must not be used right after calling SeekForward.

103 virtual void PushBack(uc16 character) = 0;	104 virtual void PushBack(int32_t character) = 0;

104	105

105 protected:	106 protected:

106 static const int32_t kEndOfInput = -1;	107 static const uc32 kEndOfInput = -1;

107	108

108 // Ensures that the buffer_cursor_ points to the character at	109 // Ensures that the buffer_cursor_ points to the character at

109 // position pos_ of the input, if possible. If the position	110 // position pos_ of the input, if possible. If the position

110 // is at or after the end of the input, return false. If there	111 // is at or after the end of the input, return false. If there

111 // are more characters available, return true.	112 // are more characters available, return true.

112 virtual bool ReadBlock() = 0;	113 virtual bool ReadBlock() = 0;

113 virtual unsigned SlowSeekForward(unsigned character_count) = 0;	114 virtual unsigned SlowSeekForward(unsigned character_count) = 0;

114	115

115 const uc16* buffer_cursor_;	116 const uc16* buffer_cursor_;

116 const uc16* buffer_end_;	117 const uc16* buffer_end_;

(...skipping 122 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
239 Vector<byte> backing_store_;	240 Vector<byte> backing_store_;

240 };	241 };

241	242

242	243

243 // ----------------------------------------------------------------------------	244 // ----------------------------------------------------------------------------

244 // Scanner base-class.	245 // Scanner base-class.

245	246

246 // Generic functionality used by both JSON and JavaScript scanners.	247 // Generic functionality used by both JSON and JavaScript scanners.

247 class Scanner {	248 class Scanner {

248 public:	249 public:

	250 // -1 is outside of the range of any real source code.

	251 static const int kNoOctalLocation = -1;

	252

249 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;	253 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;

250	254

251 class LiteralScope {	255 class LiteralScope {

252 public:	256 public:

253 explicit LiteralScope(Scanner* self);	257 explicit LiteralScope(Scanner* self);

254 ~LiteralScope();	258 ~LiteralScope();

255 void Complete();	259 void Complete();

256	260

257 private:	261 private:

258 Scanner* scanner_;	262 Scanner* scanner_;

259 bool complete_;	263 bool complete_;

260 };	264 };

261	265

262 Scanner();	266 Scanner();

263	267

264 // Returns the current token again.	268 // Returns the current token again.

265 Token::Value current_token() { return current_.token; }	269 Token::Value current_token() { return current_.token; }

266	270

267 // One token look-ahead (past the token returned by Next()).	271 // One token look-ahead (past the token returned by Next()).

268 Token::Value peek() const { return next_.token; }	272 Token::Value peek() const { return next_.token; }

269	273

270 struct Location {	274 struct Location {

271 Location(int b, int e) : beg_pos(b), end_pos(e) { }	275 Location(int b, int e) : beg_pos(b), end_pos(e) { }

272 Location() : beg_pos(0), end_pos(0) { }	276 Location() : beg_pos(0), end_pos(0) { }

	277

	278 bool IsValid() const {

	279 return beg_pos >= 0 && end_pos >= beg_pos;

	280 }

	281

273 int beg_pos;	282 int beg_pos;

274 int end_pos;	283 int end_pos;

275 };	284 };

276	285

	286 static Location NoLocation() {

	287 return Location(-1, -1);

	288 }

	289

277 // Returns the location information for the current token	290 // Returns the location information for the current token

278 // (the token returned by Next()).	291 // (the token returned by Next()).

279 Location location() const { return current_.location; }	292 Location location() const { return current_.location; }

280 Location peek_location() const { return next_.location; }	293 Location peek_location() const { return next_.location; }

281	294

	295 // Returns the location of the last seen octal literal

	296 int octal_position() const { return octal_pos_; }

	297 void clear_octal_position() { octal_pos_ = -1; }

	298

282 // Returns the literal string, if any, for the current token (the	299 // Returns the literal string, if any, for the current token (the

283 // token returned by Next()). The string is 0-terminated and in	300 // token returned by Next()). The string is 0-terminated and in

284 // UTF-8 format; they may contain 0-characters. Literal strings are	301 // UTF-8 format; they may contain 0-characters. Literal strings are

285 // collected for identifiers, strings, and numbers.	302 // collected for identifiers, strings, and numbers.

286 // These functions only give the correct result if the literal	303 // These functions only give the correct result if the literal

287 // was scanned between calls to StartLiteral() and TerminateLiteral().	304 // was scanned between calls to StartLiteral() and TerminateLiteral().

288 bool is_literal_ascii() {	305 bool is_literal_ascii() {

289 ASSERT_NOT_NULL(current_.literal_chars);	306 ASSERT_NOT_NULL(current_.literal_chars);

290 return current_.literal_chars->is_ascii();	307 return current_.literal_chars->is_ascii();

291 }	308 }

(...skipping 93 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
385 Advance();	402 Advance();

386 if (c0_ == next) {	403 if (c0_ == next) {

387 Advance();	404 Advance();

388 return then;	405 return then;

389 } else {	406 } else {

390 return else_;	407 return else_;

391 }	408 }

392 }	409 }

393	410

394 uc32 ScanHexEscape(uc32 c, int length);	411 uc32 ScanHexEscape(uc32 c, int length);

	412

	413 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence.

395 uc32 ScanOctalEscape(uc32 c, int length);	414 uc32 ScanOctalEscape(uc32 c, int length);

396	415

397 // Return the current source position.	416 // Return the current source position.

398 int source_pos() {	417 int source_pos() {

399 return source_->pos() - kCharacterLookaheadBufferSize;	418 return source_->pos() - kCharacterLookaheadBufferSize;

400 }	419 }

401	420

402 // Buffers collecting literal strings, numbers, etc.	421 // Buffers collecting literal strings, numbers, etc.

403 LiteralBuffer literal_buffer1_;	422 LiteralBuffer literal_buffer1_;

404 LiteralBuffer literal_buffer2_;	423 LiteralBuffer literal_buffer2_;

405	424

406 TokenDesc current_; // desc for current token (as returned by Next())	425 TokenDesc current_; // desc for current token (as returned by Next())

407 TokenDesc next_; // desc for next token (one token look-ahead)	426 TokenDesc next_; // desc for next token (one token look-ahead)

408	427

409 // Input stream. Must be initialized to an UC16CharacterStream.	428 // Input stream. Must be initialized to an UC16CharacterStream.

410 UC16CharacterStream* source_;	429 UC16CharacterStream* source_;

411	430

	431 // Start position of the octal literal last scanned.

	432 int octal_pos_;

412	433

413 // One Unicode character look-ahead; c0_ < 0 at the end of the input.	434 // One Unicode character look-ahead; c0_ < 0 at the end of the input.

414 uc32 c0_;	435 uc32 c0_;

415 };	436 };

416	437

417 // ----------------------------------------------------------------------------	438 // ----------------------------------------------------------------------------

418 // JavaScriptScanner - base logic for JavaScript scanning.	439 // JavaScriptScanner - base logic for JavaScript scanning.

419	440

420 class JavaScriptScanner : public Scanner {	441 class JavaScriptScanner : public Scanner {

421 public:	442 public:

(...skipping 114 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
536 UNMATCHABLE,	557 UNMATCHABLE,

537 INITIAL,	558 INITIAL,

538 KEYWORD_PREFIX,	559 KEYWORD_PREFIX,

539 KEYWORD_MATCHED,	560 KEYWORD_MATCHED,

540 C,	561 C,

541 CA,	562 CA,

542 CO,	563 CO,

543 CON,	564 CON,

544 D,	565 D,

545 DE,	566 DE,

	567 E,

	568 EX,

546 F,	569 F,

547 I,	570 I,

	571 IM,

	572 IMP,

548 IN,	573 IN,

549 N,	574 N,

	575 P,

	576 PR,

	577 S,

550 T,	578 T,

551 TH,	579 TH,

552 TR,	580 TR,

553 V,	581 V,

554 W	582 W

555 };	583 };

556	584

557 struct FirstState {	585 struct FirstState {

558 const char* keyword;	586 const char* keyword;

559 State state;	587 State state;

560 Token::Value token;	588 Token::Value token;

561 };	589 };

562	590

563 // Range of possible first characters of a keyword.	591 // Range of possible first characters of a keyword.

564 static const unsigned int kFirstCharRangeMin = 'b';	592 static const unsigned int kFirstCharRangeMin = 'b';

565 static const unsigned int kFirstCharRangeMax = 'w';	593 static const unsigned int kFirstCharRangeMax = 'y';

566 static const unsigned int kFirstCharRangeLength =	594 static const unsigned int kFirstCharRangeLength =

567 kFirstCharRangeMax - kFirstCharRangeMin + 1;	595 kFirstCharRangeMax - kFirstCharRangeMin + 1;

568 // State map for first keyword character range.	596 // State map for first keyword character range.

569 static FirstState first_states_[kFirstCharRangeLength];	597 static FirstState first_states_[kFirstCharRangeLength];

570	598

571 // If input equals keyword's character at position, continue matching keyword	599 // If input equals keyword's character at position, continue matching keyword

572 // from that position.	600 // from that position.

573 inline bool MatchKeywordStart(unibrow::uchar input,	601 inline bool MatchKeywordStart(unibrow::uchar input,

574 const char* keyword,	602 const char* keyword,

575 int position,	603 int position,

(...skipping 40 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
616 // keyword with the current prefix).	644 // keyword with the current prefix).

617 const char* keyword_;	645 const char* keyword_;

618 int counter_;	646 int counter_;

619 Token::Value keyword_token_;	647 Token::Value keyword_token_;

620 };	648 };

621	649

622	650

623 } } // namespace v8::internal	651 } } // namespace v8::internal

624	652

625 #endif // V8_SCANNER_BASE_H_	653 #endif // V8_SCANNER_BASE_H_

OLD	NEW

« no previous file with comments | « src/scanner.cc ('k') | src/scanner-base.cc » ('j') | no next file with comments »