src/scanner.h - Issue 661367: Refactor the scanner interface...

Side by Side Diff: src/scanner.h

Issue 661367: Refactor the scanner interface... (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/

Patch Set: '' Created 10 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved.	1 // Copyright 2006-2008 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 66 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
77 }	77 }

78	78

79 static char* ComputeLimit(char* data, int capacity) {	79 static char* ComputeLimit(char* data, int capacity) {

80 return (data + capacity) - unibrow::Utf8::kMaxEncodedSize;	80 return (data + capacity) - unibrow::Utf8::kMaxEncodedSize;

81 }	81 }

82	82

83 void AddCharSlow(uc32 c);	83 void AddCharSlow(uc32 c);

84 };	84 };

85	85

86	86

	87 // Interface through which the scanner reads characters from the input source.

87 class UTF16Buffer {	88 class UTF16Buffer {

88 public:	89 public:

89 UTF16Buffer();	90 UTF16Buffer();

90 virtual ~UTF16Buffer() {}	91 virtual ~UTF16Buffer() {}

91	92

92 virtual void PushBack(uc32 ch) = 0;	93 virtual void PushBack(uc32 ch) = 0;

93 // returns a value < 0 when the buffer end is reached	94 // Returns a value < 0 when the buffer end is reached.

94 virtual uc32 Advance() = 0;	95 virtual uc32 Advance() = 0;

95 virtual void SeekForward(int pos) = 0;	96 virtual void SeekForward(int pos) = 0;

96	97

97 int pos() const { return pos_; }	98 int pos() const { return pos_; }

98 int size() const { return size_; }

99 Handle<String> SubString(int start, int end);

100	99

101 protected:	100 protected:

102 Handle<String> data_;	101 int pos_; // Current position in the buffer.

103 int pos_;	102 int end_; // Position where scanning should stop (EOF).

104 int size_;

105 };	103 };

106	104

107	105

	106 // UTF16 buffer to read characters from a character stream.

108 class CharacterStreamUTF16Buffer: public UTF16Buffer {	107 class CharacterStreamUTF16Buffer: public UTF16Buffer {

109 public:	108 public:

110 CharacterStreamUTF16Buffer();	109 CharacterStreamUTF16Buffer();

111 virtual ~CharacterStreamUTF16Buffer() {}	110 virtual ~CharacterStreamUTF16Buffer() {}

112 void Initialize(Handle<String> data, unibrow::CharacterStream* stream);	111 void Initialize(Handle<String> data,

	112 unibrow::CharacterStream* stream,

	113 int start_position,

	114 int end_position);

113 virtual void PushBack(uc32 ch);	115 virtual void PushBack(uc32 ch);

114 virtual uc32 Advance();	116 virtual uc32 Advance();

115 virtual void SeekForward(int pos);	117 virtual void SeekForward(int pos);

116	118

117 private:	119 private:

118 List<uc32> pushback_buffer_;	120 List<uc32> pushback_buffer_;

119 uc32 last_;	121 uc32 last_;

120 unibrow::CharacterStream* stream_;	122 unibrow::CharacterStream* stream_;

121	123

122 List<uc32>* pushback_buffer() { return &pushback_buffer_; }	124 List<uc32>* pushback_buffer() { return &pushback_buffer_; }

123 };	125 };

124	126

125	127

126 class TwoByteStringUTF16Buffer: public UTF16Buffer {	128 // UTF16 buffer to read characters from an external string.

	129 template <typename StringType, typename CharType>

	130 class ExternalStringUTF16Buffer: public UTF16Buffer {

127 public:	131 public:

128 TwoByteStringUTF16Buffer();	132 ExternalStringUTF16Buffer();

129 virtual ~TwoByteStringUTF16Buffer() {}	133 virtual ~ExternalStringUTF16Buffer() {}

130 void Initialize(Handle<ExternalTwoByteString> data);	134 void Initialize(Handle<StringType> data,

	135 int start_position,

	136 int end_position);

131 virtual void PushBack(uc32 ch);	137 virtual void PushBack(uc32 ch);

132 virtual uc32 Advance();	138 virtual uc32 Advance();

133 virtual void SeekForward(int pos);	139 virtual void SeekForward(int pos);

134	140

135 private:	141 private:

136 const uint16_t* raw_data_;	142 const CharType* raw_data_; // Pointer to the actual array of characters.

137 };	143 };

138	144

139	145

140 class KeywordMatcher {	146 class KeywordMatcher {

141 // Incrementally recognize keywords.	147 // Incrementally recognize keywords.

142 //	148 //

143 // Recognized keywords:	149 // Recognized keywords:

144 // break case catch const* continue debugger* default delete do else	150 // break case catch const* continue debugger* default delete do else

145 // finally false for function if in instanceof native* new null	151 // finally false for function if in instanceof native* new null

146 // return switch this throw true try typeof var void while with	152 // return switch this throw true try typeof var void while with

(...skipping 109 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
256 enum ParserLanguage { JAVASCRIPT, JSON };	262 enum ParserLanguage { JAVASCRIPT, JSON };

257	263

258	264

259 class Scanner {	265 class Scanner {

260 public:	266 public:

261 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;	267 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;

262	268

263 // Construction	269 // Construction

264 explicit Scanner(ParserMode parse_mode);	270 explicit Scanner(ParserMode parse_mode);

265	271

266 // Initialize the Scanner to scan source:	272 // Initialize the Scanner to scan source.

267 void Init(Handle<String> source,	273 void Initialize(Handle<String> source,

268 unibrow::CharacterStream* stream,	274 ParserLanguage language);

269 int position,	275 void Initialize(Handle<String> source,

270 ParserLanguage language);	276 unibrow::CharacterStream* stream,

	277 ParserLanguage language);

	278 void Initialize(Handle<String> source,

	279 int start_position, int end_position,

	280 ParserLanguage language);

271	281

272 // Returns the next token.	282 // Returns the next token.

273 Token::Value Next();	283 Token::Value Next();

274	284

275 // One token look-ahead (past the token returned by Next()).	285 // One token look-ahead (past the token returned by Next()).

276 Token::Value peek() const { return next_.token; }	286 Token::Value peek() const { return next_.token; }

277	287

278 // Returns true if there was a line terminator before the peek'ed token.	288 // Returns true if there was a line terminator before the peek'ed token.

279 bool has_line_terminator_before_next() const {	289 bool has_line_terminator_before_next() const {

280 return has_line_terminator_before_next_;	290 return has_line_terminator_before_next_;

(...skipping 47 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
328 // Returns true if regexp flags are scanned (always since flags can	338 // Returns true if regexp flags are scanned (always since flags can

329 // be empty).	339 // be empty).

330 bool ScanRegExpFlags();	340 bool ScanRegExpFlags();

331	341

332 // Seek forward to the given position. This operation does not	342 // Seek forward to the given position. This operation does not

333 // work in general, for instance when there are pushed back	343 // work in general, for instance when there are pushed back

334 // characters, but works for seeking forward until simple delimiter	344 // characters, but works for seeking forward until simple delimiter

335 // tokens, which is what it is used for.	345 // tokens, which is what it is used for.

336 void SeekForward(int pos);	346 void SeekForward(int pos);

337	347

338 Handle<String> SubString(int start_pos, int end_pos);

339 bool stack_overflow() { return stack_overflow_; }	348 bool stack_overflow() { return stack_overflow_; }

340	349

341 static StaticResource<Utf8Decoder>* utf8_decoder() { return &utf8_decoder_; }	350 static StaticResource<Utf8Decoder>* utf8_decoder() { return &utf8_decoder_; }

342	351

343 // Tells whether the buffer contains an identifier (no escapes).	352 // Tells whether the buffer contains an identifier (no escapes).

344 // Used for checking if a property name is an identifier.	353 // Used for checking if a property name is an identifier.

345 static bool IsIdentifier(unibrow::CharacterStream* buffer);	354 static bool IsIdentifier(unibrow::CharacterStream* buffer);

346	355

347 static unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart;	356 static unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart;

348 static unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart;	357 static unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart;

349 static unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator;	358 static unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator;

350 static unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace;	359 static unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace;

351	360

352 static const int kCharacterLookaheadBufferSize = 1;	361 static const int kCharacterLookaheadBufferSize = 1;

	362 static const int kNoEndPosition = 1;

353	363

354 private:	364 private:

	365 void Init(Handle<String> source,

	366 unibrow::CharacterStream* stream,

	367 int start_position, int end_position,

	368 ParserLanguage language);

	369

	370

	371 // Different UTF16 buffers used to pull characters from. Based on input one of

	372 // these will be initialized as the actual data source.

355 CharacterStreamUTF16Buffer char_stream_buffer_;	373 CharacterStreamUTF16Buffer char_stream_buffer_;

356 TwoByteStringUTF16Buffer two_byte_string_buffer_;	374 ExternalStringUTF16Buffer<ExternalTwoByteString, uint16_t>

	375 two_byte_string_buffer_;

	376 ExternalStringUTF16Buffer<ExternalAsciiString, char> ascii_string_buffer_;

357	377

358 // Source.	378 // Source. Will point to one of the buffers declared above.

359 UTF16Buffer* source_;	379 UTF16Buffer* source_;

360 int position_;	380

	381 // Used to convert the source string into a character stream when a stream

	382 // is not passed to the scanner.

	383 SafeStringInputBuffer safe_string_input_buffer_;

361	384

362 // Buffer to hold literal values (identifiers, strings, numbers)	385 // Buffer to hold literal values (identifiers, strings, numbers)

363 // using 0-terminated UTF-8 encoding.	386 // using 0-terminated UTF-8 encoding.

364 UTF8Buffer literal_buffer_1_;	387 UTF8Buffer literal_buffer_1_;

365 UTF8Buffer literal_buffer_2_;	388 UTF8Buffer literal_buffer_2_;

366	389

367 bool stack_overflow_;	390 bool stack_overflow_;

368 static StaticResource<Utf8Decoder> utf8_decoder_;	391 static StaticResource<Utf8Decoder> utf8_decoder_;

369	392

370 // One Unicode character look-ahead; c0_ < 0 at the end of the input.	393 // One Unicode character look-ahead; c0_ < 0 at the end of the input.

(...skipping 82 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
453 uc32 ScanHexEscape(uc32 c, int length);	476 uc32 ScanHexEscape(uc32 c, int length);

454 uc32 ScanOctalEscape(uc32 c, int length);	477 uc32 ScanOctalEscape(uc32 c, int length);

455 void ScanEscape();	478 void ScanEscape();

456 Token::Value ScanString();	479 Token::Value ScanString();

457	480

458 // Scans a possible HTML comment -- begins with '<!'.	481 // Scans a possible HTML comment -- begins with '<!'.

459 Token::Value ScanHtmlComment();	482 Token::Value ScanHtmlComment();

460	483

461 // Return the current source position.	484 // Return the current source position.

462 int source_pos() {	485 int source_pos() {

463 return source_->pos() - kCharacterLookaheadBufferSize + position_;	486 return source_->pos() - kCharacterLookaheadBufferSize;

464 }	487 }

465	488

466 // Decodes a unicode escape-sequence which is part of an identifier.	489 // Decodes a unicode escape-sequence which is part of an identifier.

467 // If the escape sequence cannot be decoded the result is kBadRune.	490 // If the escape sequence cannot be decoded the result is kBadRune.

468 uc32 ScanIdentifierUnicodeEscape();	491 uc32 ScanIdentifierUnicodeEscape();

469 };	492 };

470	493

471 } } // namespace v8::internal	494 } } // namespace v8::internal

472	495

473 #endif // V8_SCANNER_H_	496 #endif // V8_SCANNER_H_

OLD	NEW

« no previous file with comments | « src/parser.cc ('k') | src/scanner.cc » ('j') | no next file with comments »