src/lexer/lexer.h - Issue 180743019: Experimental parser: more cleanup after rebase

Side by Side Diff: src/lexer/lexer.h

Issue 180743019: Experimental parser: more cleanup after rebase (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: Created 6 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2013 the V8 project authors. All rights reserved.	1 // Copyright 2013 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 27 matching lines...) Expand all Loading...
38 class LexerBase;	38 class LexerBase;

39	39

40 class LexerGCHandler {	40 class LexerGCHandler {

41 public:	41 public:

42 explicit LexerGCHandler(Isolate* isolate) : isolate_(isolate) {}	42 explicit LexerGCHandler(Isolate* isolate) : isolate_(isolate) {}

43 void AddLexer(LexerBase* lexer);	43 void AddLexer(LexerBase* lexer);

44 void RemoveLexer(LexerBase* lexer);	44 void RemoveLexer(LexerBase* lexer);

45 void UpdateLexersAfterGC();	45 void UpdateLexersAfterGC();

46	46

47 private:	47 private:

	48 typedef std::set<LexerBase*> LexerSet;

48 Isolate* isolate_;	49 Isolate* isolate_;

49 std::set<LexerBase*> lexers_;	50 LexerSet lexers_;

50 };	51 };

51	52

52	53

53 class LexerBase {	54 class LexerBase {

54 public:	55 public:

55 struct Location {	56 struct Location {

56 Location(int b, int e) : beg_pos(b), end_pos(e) { }	57 Location(int b, int e) : beg_pos(b), end_pos(e) { }

57 Location() : beg_pos(0), end_pos(0) { }	58 Location() : beg_pos(0), end_pos(0) { }

58	59

59 bool IsValid() const {	60 bool IsValid() const {

60 return beg_pos >= 0 && end_pos >= beg_pos;	61 return beg_pos >= 0 && end_pos >= beg_pos;

61 }	62 }

62	63

63 static Location invalid() { return Location(-1, -1); }	64 static Location invalid() { return Location(-1, -1); }

64	65

65 int beg_pos;	66 int beg_pos;

66 int end_pos;	67 int end_pos;

67 };	68 };

68	69

69 explicit LexerBase(UnicodeCache* unicode_cache)	70 explicit LexerBase(UnicodeCache* unicode_cache);

70 : unicode_cache_(unicode_cache),

71 has_line_terminator_before_next_(true),

72 has_multiline_comment_before_next_(false),

73 current_literal_(&literals_[0]),

74 next_literal_(&literals_[1]),

75 harmony_numeric_literals_(false),

76 harmony_modules_(false),

77 harmony_scoping_(false) {

78 }

79	71

80 virtual ~LexerBase();	72 virtual ~LexerBase();

81	73

82 // Returns the next token and advances input.	74 // Returns the next token and advances input.

83 Token::Value Next();	75 Token::Value Next();

84	76

85 // Returns the current token again.	77 // Returns the current token again.

86 Token::Value current_token() const { return current_.token; }	78 Token::Value current_token() const { return current_.token; }

87	79

88 // Returns the location information for the current token	80 // Returns the location information for the current token

89 // (the token last returned by Next()).	81 // (the token last returned by Next()).

90 Location location() const {	82 Location location() const {

91 return Location(current_.beg_pos, current_.end_pos);	83 return Location(current_.beg_pos, current_.end_pos);

92 }	84 }

93	85

94 // One token look-ahead (past the token returned by Next()).	86 // One token look-ahead (past the token returned by Next()).

95 Token::Value peek() const { return next_.token; }	87 Token::Value peek() const { return next_.token; }

96	88

97 Location peek_location() const {	89 Location peek_location() const {

98 return Location(next_.beg_pos, next_.end_pos);	90 return Location(next_.beg_pos, next_.end_pos);

99 }	91 }

100	92

101 // Seek forward to the given position. This operation works for simple cases	93 // Seek forward to the given position. This operation works for simple cases

102 // such as seeking forward until simple delimiter tokens, which is what it is	94 // such as seeking forward until simple delimiter tokens, which is what it is

103 // used for. After this call, we will have the token at the given position as	95 // used for. After this call, we will have the token at the given position as

104 // the "next" token. The "current" token will be invalid. FIXME: for utf-8,	96 // the "next" token. The "current" token will be invalid. FIXME: for utf-8,

105 // we need to decide if pos is counted in characters or in bytes.	97 // we need to decide if pos is counted in characters or in bytes.

106 virtual void SeekForward(int pos) = 0;	98 virtual void SeekForward(int pos) = 0;

107	99

108 virtual void SetEnd(int pos) = 0;

109

110 // Scans the input as a regular expression pattern, previous character(s) must	100 // Scans the input as a regular expression pattern, previous character(s) must

111 // be /(=). Returns true if a pattern is scanned. FIXME: this won't work for	101 // be /(=). Returns true if a pattern is scanned. FIXME: this won't work for

112 // utf-8 newlines.	102 // utf-8 newlines.

113 virtual bool ScanRegExpPattern(bool seen_equal) = 0;	103 virtual bool ScanRegExpPattern(bool seen_equal) = 0;

114	104

115 // Returns true if regexp flags are scanned (always since flags can	105 // Returns true if regexp flags are scanned (always since flags can

116 // be empty).	106 // be empty).

117 virtual bool ScanRegExpFlags() = 0;	107 virtual bool ScanRegExpFlags() = 0;

118	108

119 // // Returns the location of the last seen octal literal.	109 // // Returns the location of the last seen octal literal.

120 virtual Location octal_position() const = 0;	110 virtual Location octal_position() const = 0;

121	111

122 virtual void clear_octal_position() = 0;	112 virtual void clear_octal_position() = 0;

123	113

124 // Returns true if there was a line terminator before the peek'ed token,	114 // Returns true if there was a line terminator before the peek'ed token,

125 // possibly inside a multi-line comment.	115 // possibly inside a multi-line comment.

126 bool HasAnyLineTerminatorBeforeNext() const {	116 bool HasAnyLineTerminatorBeforeNext() const {

127 return has_line_terminator_before_next_ \|\|	117 return has_line_terminator_before_next_ \|\|

128 has_multiline_comment_before_next_;	118 has_multiline_comment_before_next_;

129 }	119 }

130	120

131 Handle<String> GetLiteralSymbol() {	121 Vector<const uint8_t> literal_one_byte_string() {

132 EnsureCurrentLiteralIsValid();	122 EnsureCurrentLiteralIsValid();

133 return InternalizeLiteral(current_literal_);	123 return current_literal_->one_byte_string;

134 }	124 }

135	125

136 Handle<String> GetLiteralString(PretenureFlag tenured) {	126 Vector<const uint16_t> literal_two_byte_string() {

137 EnsureCurrentLiteralIsValid();	127 EnsureCurrentLiteralIsValid();

138 return AllocateLiteral(current_literal_, tenured);	128 return current_literal_->two_byte_string;

139 }

140

141 Handle<String> GetNextLiteralString(PretenureFlag tenured) {

142 EnsureNextLiteralIsValid();

143 return AllocateLiteral(next_literal_, tenured);

144 }

145

146 Vector<const char> literal_ascii_string() {

147 EnsureCurrentLiteralIsValid();

148 return current_literal_->ascii_string;

149 }

150

151 Vector<const uc16> literal_utf16_string() {

152 EnsureCurrentLiteralIsValid();

153 return current_literal_->utf16_string;

154 }	129 }

155	130

156 int literal_length() {	131 int literal_length() {

157 EnsureCurrentLiteralIsValid();	132 EnsureCurrentLiteralIsValid();

158 return current_literal_->length;	133 return current_literal_->length;

159 }	134 }

160	135

161 bool is_literal_ascii() {	136 bool is_literal_one_byte() {

162 EnsureCurrentLiteralIsValid();	137 EnsureCurrentLiteralIsValid();

163 return current_literal_->is_ascii;	138 return current_literal_->is_one_byte;

164 }	139 }

165	140

166 bool is_literal_contextual_keyword(Vector<const char> keyword) {	141 bool is_literal_contextual_keyword(Vector<const uint8_t> keyword) {

167 if (!is_literal_ascii()) return false;	142 if (!is_literal_one_byte()) return false;

168 Vector<const char> literal = literal_ascii_string();	143 Vector<const uint8_t> literal = literal_one_byte_string();

169 return literal.length() == keyword.length() &&	144 return literal.length() == keyword.length() &&

170 (memcmp(literal.start(), keyword.start(), literal.length()) == 0);	145 (memcmp(literal.start(), keyword.start(), literal.length()) == 0);

171 }	146 }

172	147

173 bool literal_contains_escapes() const {	148 bool literal_contains_escapes() const {

174 return current_.has_escapes;	149 return current_.has_escapes;

175 }	150 }

176	151

177 Vector<const char> next_literal_ascii_string() {	152 Vector<const uint8_t> next_literal_one_byte_string() {

178 EnsureNextLiteralIsValid();	153 EnsureNextLiteralIsValid();

179 return next_literal_->ascii_string;	154 return next_literal_->one_byte_string;

180 }	155 }

181	156

182 Vector<const uc16> next_literal_utf16_string() {	157 Vector<const uint16_t> next_literal_two_byte_string() {

183 EnsureNextLiteralIsValid();	158 EnsureNextLiteralIsValid();

184 return next_literal_->utf16_string;	159 return next_literal_->two_byte_string;

185 }	160 }

186	161

187 int next_literal_length() {	162 int next_literal_length() {

188 EnsureNextLiteralIsValid();	163 EnsureNextLiteralIsValid();

189 return next_literal_->length;	164 return next_literal_->length;

190 }	165 }

191	166

192 bool is_next_literal_ascii() {	167 bool is_next_literal_one_byte() {

193 EnsureNextLiteralIsValid();	168 EnsureNextLiteralIsValid();

194 return next_literal_->is_ascii;	169 return next_literal_->is_one_byte;

195 }	170 }

196	171

197 bool is_next_contextual_keyword(Vector<const char> keyword) {	172 bool is_next_contextual_keyword(Vector<const uint8_t> keyword) {

198 if (!is_next_literal_ascii()) return false;	173 if (!is_next_literal_one_byte()) return false;

199 Vector<const char> literal = next_literal_ascii_string();	174 Vector<const uint8_t> literal = next_literal_one_byte_string();

200 return literal.length() == keyword.length() &&	175 return literal.length() == keyword.length() &&

201 (memcmp(literal.start(), keyword.start(), literal.length()) == 0);	176 (memcmp(literal.start(), keyword.start(), literal.length()) == 0);

202 }	177 }

203	178

204 bool HarmonyScoping() const {	179 bool HarmonyScoping() const {

205 return harmony_scoping_;	180 return harmony_scoping_;

206 }	181 }

207	182

208 void SetHarmonyScoping(bool scoping) {	183 void SetHarmonyScoping(bool scoping) {

209 harmony_scoping_ = scoping;	184 harmony_scoping_ = scoping;

(...skipping 21 matching lines...) Expand all Loading...
231 struct TokenDesc {	206 struct TokenDesc {

232 Token::Value token;	207 Token::Value token;

233 int beg_pos;	208 int beg_pos;

234 int end_pos;	209 int end_pos;

235 bool has_escapes;	210 bool has_escapes;

236 bool is_onebyte;	211 bool is_onebyte;

237 };	212 };

238	213

239 struct LiteralDesc {	214 struct LiteralDesc {

240 int beg_pos;	215 int beg_pos;

241 bool is_ascii;	216 bool is_one_byte;

242 bool is_in_buffer;	217 bool is_in_buffer;

243 int offset;	218 int offset;

244 int length;	219 int length;

245 Vector<const char> ascii_string;	220 Vector<const uint8_t> one_byte_string;

246 Vector<const uc16> utf16_string;	221 Vector<const uint16_t> two_byte_string;

247 LiteralBuffer buffer;	222 LiteralBuffer buffer;

248 LiteralDesc() : beg_pos(-1), is_ascii(false), is_in_buffer(false),	223 LiteralDesc() : beg_pos(-1), is_one_byte(false), is_in_buffer(false),

249 offset(0), length(0) { }	224 offset(0), length(0) { }

250 bool Valid(int pos) { return beg_pos == pos; }	225 bool Valid(int pos) { return beg_pos == pos; }

251 };	226 };

252	227

253 virtual void Scan() = 0;	228 virtual void Scan() = 0;

254	229

255 virtual void UpdateBufferBasedOnHandle() = 0;	230 virtual void UpdateBufferBasedOnHandle() = 0;

256 virtual bool FillLiteral(const TokenDesc& token, LiteralDesc* literal) = 0;	231 virtual bool FillLiteral(const TokenDesc& token, LiteralDesc* literal) = 0;

257 virtual Handle<String> InternalizeLiteral(LiteralDesc* literal) = 0;	232 virtual Handle<String> InternalizeLiteral(LiteralDesc* literal) = 0;

258 virtual Handle<String> AllocateLiteral(LiteralDesc* literal,	233 virtual Handle<String> AllocateLiteral(LiteralDesc* literal,

(...skipping 37 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
296 friend class Scanner;	271 friend class Scanner;

297 friend class LexerGCHandler;	272 friend class LexerGCHandler;

298 };	273 };

299	274

300	275

301 template<typename Char>	276 template<typename Char>

302 class Lexer : public LexerBase {	277 class Lexer : public LexerBase {

303 public:	278 public:

304 Lexer(UnicodeCache* unicode_cache,	279 Lexer(UnicodeCache* unicode_cache,

305 Handle<String> source,	280 Handle<String> source,

306 int start_position_,	281 int start_position,

307 int end_position_);	282 int end_position);

308 Lexer(UnicodeCache* unicode_cache, const Char* source_ptr, int length);	283 Lexer(UnicodeCache* unicode_cache, const Char* source_ptr, int length);

309 virtual ~Lexer();	284 virtual ~Lexer();

310	285

311 virtual void SeekForward(int pos);	286 virtual void SeekForward(int pos);

312 virtual void SetEnd(int pos);

313 virtual bool ScanRegExpPattern(bool seen_equal);	287 virtual bool ScanRegExpPattern(bool seen_equal);

314 virtual bool ScanRegExpFlags();	288 virtual bool ScanRegExpFlags();

315 virtual Location octal_position() const;	289 virtual Location octal_position() const;

316 virtual void clear_octal_position() { last_octal_end_ = NULL; }	290 virtual void clear_octal_position() { last_octal_end_ = NULL; }

317	291

318 protected:	292 protected:

319 virtual void Scan();	293 virtual void Scan();

320	294

321 const Char* GetNewBufferBasedOnHandle() const;	295 const Char* GetNewBufferBasedOnHandle() const;

322 virtual void UpdateBufferBasedOnHandle();	296 virtual void UpdateBufferBasedOnHandle();

(...skipping 23 matching lines...) Expand all Loading...
346	320

347 // Returns true if the literal of the token can be represented as a	321 // Returns true if the literal of the token can be represented as a

348 // substring of the source.	322 // substring of the source.

349 bool IsSubstringOfSource(const TokenDesc& token);	323 bool IsSubstringOfSource(const TokenDesc& token);

350	324

351 bool CopyToLiteralBuffer(const Char* start,	325 bool CopyToLiteralBuffer(const Char* start,

352 const Char* end,	326 const Char* end,

353 const TokenDesc& token,	327 const TokenDesc& token,

354 LiteralDesc* literal);	328 LiteralDesc* literal);

355	329

	330 // One of source_handle_ or source_ptr_ is set.

	331 // If source_ptr_ is set, isolate_ is 0 and no isolate accesses are allowed.

356 Isolate* isolate_;	332 Isolate* isolate_;

357 const Handle<String> source_handle_;	333 const Handle<String> source_handle_;

358 const Char* const source_ptr_;	334 const Char* const source_ptr_;

359 const int start_position_;	335 const int start_position_;

360 const int end_position_;	336 const int end_position_;

	337 // Stream variables.

361 const Char* buffer_;	338 const Char* buffer_;

362 const Char* buffer_end_;	339 const Char* buffer_end_;

363 const Char* start_;	340 const Char* start_;

364 const Char* cursor_;	341 const Char* cursor_;

365

366 // Where we have seen the last octal number or an octal escape inside a	342 // Where we have seen the last octal number or an octal escape inside a

367 // string. Used by octal_position().	343 // string. Used by octal_position().

368 const Char* last_octal_end_;	344 const Char* last_octal_end_;

369 };	345 };

370	346

371	347

372 #ifdef V8_USE_GENERATED_LEXER	348 #ifdef V8_USE_GENERATED_LEXER

373	349

374	350

375 // Match old scanner interface.	351 // Match old scanner interface.

376 class Scanner {	352 class Scanner {

377 public:	353 public:

378 typedef LexerBase::Location Location;	354 typedef LexerBase::Location Location;

379	355

380 explicit Scanner(UnicodeCache* unicode_cache);	356 explicit Scanner(UnicodeCache* unicode_cache);

381	357

382 ~Scanner() { delete lexer_; }	358 ~Scanner() { delete lexer_; }

383	359

384 void Initialize(Utf16CharacterStream* source);	360 void Initialize(Utf16CharacterStream* source);

385	361

386 inline void SeekForward(int pos) { lexer_->SeekForward(pos); }	362 inline void SeekForward(int pos) { lexer_->SeekForward(pos); }

387	363

388 inline void SetEnd(int pos) { lexer_->SetEnd(pos); }

389

390 inline bool ScanRegExpPattern(bool seen_equal) {	364 inline bool ScanRegExpPattern(bool seen_equal) {

391 return lexer_->ScanRegExpPattern(seen_equal);	365 return lexer_->ScanRegExpPattern(seen_equal);

392 }	366 }

393	367

394 inline bool ScanRegExpFlags() { return lexer_->ScanRegExpFlags(); }	368 inline bool ScanRegExpFlags() { return lexer_->ScanRegExpFlags(); }

395	369

396 inline Location octal_position() const { return lexer_->octal_position(); }	370 inline Location octal_position() const { return lexer_->octal_position(); }

397	371

398 inline void clear_octal_position() { lexer_->clear_octal_position(); }	372 inline void clear_octal_position() { lexer_->clear_octal_position(); }

399	373

(...skipping 33 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
433	407

434 inline void SetHarmonyNumericLiterals(bool numeric_literals) {	408 inline void SetHarmonyNumericLiterals(bool numeric_literals) {

435 harmony_numeric_literals_ = numeric_literals;	409 harmony_numeric_literals_ = numeric_literals;

436 SyncSettings();	410 SyncSettings();

437 }	411 }

438	412

439 inline bool HasAnyLineTerminatorBeforeNext() const {	413 inline bool HasAnyLineTerminatorBeforeNext() const {

440 return lexer_->HasAnyLineTerminatorBeforeNext();	414 return lexer_->HasAnyLineTerminatorBeforeNext();

441 }	415 }

442	416

443 inline Handle<String> GetLiteralSymbol() {

444 return lexer_->GetLiteralSymbol();

445 }

446

447 inline Handle<String> GetLiteralString(PretenureFlag tenured) {

448 return lexer_->GetLiteralString(tenured);

449 }

450

451 inline Handle<String> GetNextLiteralString(PretenureFlag tenured) {

452 return lexer_->GetNextLiteralString(tenured);

453 }

454

455 inline Vector<const char> literal_ascii_string() {	417 inline Vector<const char> literal_ascii_string() {

456 return lexer_->literal_ascii_string();	418 return Vector<const char>::cast(lexer_->literal_one_byte_string());

457 }	419 }

458	420

459 inline Vector<const uc16> literal_utf16_string() {	421 inline Vector<const uc16> literal_utf16_string() {

460 return lexer_->literal_utf16_string();	422 return lexer_->literal_two_byte_string();

461 }	423 }

462	424

463 inline int literal_length() {	425 inline int literal_length() {

464 return lexer_->literal_length();	426 return lexer_->literal_length();

465 }	427 }

466	428

467 inline bool is_literal_ascii() {	429 inline bool is_literal_ascii() {

468 return lexer_->is_literal_ascii();	430 return lexer_->is_literal_one_byte();

469 }	431 }

470	432

471 inline bool is_literal_contextual_keyword(Vector<const char> keyword) {	433 inline bool is_literal_contextual_keyword(

472 return lexer_->is_literal_contextual_keyword(keyword);	434 Vector<const char>& keyword) { // NOLINT

	435 return lexer_->is_literal_contextual_keyword(

	436 Vector<const uint8_t>::cast(keyword));

473 }	437 }

474	438

475 inline bool literal_contains_escapes() const {	439 inline bool literal_contains_escapes() const {

476 return lexer_->literal_contains_escapes();	440 return lexer_->literal_contains_escapes();

477 }	441 }

478	442

479 inline Vector<const char> next_literal_ascii_string() {	443 inline Vector<const char> next_literal_ascii_string() {

480 return lexer_->next_literal_ascii_string();	444 return Vector<const char>::cast(lexer_->next_literal_one_byte_string());

481 }	445 }

482	446

483 inline Vector<const uc16> next_literal_utf16_string() {	447 inline Vector<const uc16> next_literal_utf16_string() {

484 return lexer_->next_literal_utf16_string();	448 return lexer_->next_literal_two_byte_string();

485 }	449 }

486	450

487 inline int next_literal_length() {	451 inline int next_literal_length() {

488 return lexer_->next_literal_length();	452 return lexer_->next_literal_length();

489 }	453 }

490	454

491 inline bool is_next_literal_ascii() {	455 inline bool is_next_literal_ascii() {

492 return lexer_->is_next_literal_ascii();	456 return lexer_->is_next_literal_one_byte();

493 }	457 }

494	458

495 inline bool is_next_contextual_keyword(Vector<const char> keyword) {	459 inline bool is_next_contextual_keyword(

496 return lexer_->is_next_contextual_keyword(keyword);	460 Vector<const char>& keyword) { // NOLINT

	461 return lexer_->is_next_contextual_keyword(

	462 Vector<const uint8_t>::cast(keyword));

497 }	463 }

498	464

499 private:	465 private:

500 void SyncSettings();	466 void SyncSettings();

501	467

502 UnicodeCache* unicode_cache_;	468 UnicodeCache* unicode_cache_;

503 LexerBase* lexer_;	469 LexerBase* lexer_;

504 bool harmony_numeric_literals_;	470 bool harmony_numeric_literals_;

505 bool harmony_modules_;	471 bool harmony_modules_;

506 bool harmony_scoping_;	472 bool harmony_scoping_;

507 };	473 };

508	474

509	475

510 #endif	476 #endif

511	477

512	478

513 } }	479 } }

514	480

515 #endif // V8_LEXER_EXPERIMENTAL_SCANNER_H	481 #endif // V8_LEXER_EXPERIMENTAL_SCANNER_H

OLD	NEW

« no previous file with comments | « no previous file | src/lexer/lexer.cc » ('j') | no next file with comments »