Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Side by Side Diff: src/lexer/lexer.h

Issue 180743019: Experimental parser: more cleanup after rebase (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 6 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | src/lexer/lexer.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2013 the V8 project authors. All rights reserved. 1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 27 matching lines...) Expand all
38 class LexerBase; 38 class LexerBase;
39 39
40 class LexerGCHandler { 40 class LexerGCHandler {
41 public: 41 public:
42 explicit LexerGCHandler(Isolate* isolate) : isolate_(isolate) {} 42 explicit LexerGCHandler(Isolate* isolate) : isolate_(isolate) {}
43 void AddLexer(LexerBase* lexer); 43 void AddLexer(LexerBase* lexer);
44 void RemoveLexer(LexerBase* lexer); 44 void RemoveLexer(LexerBase* lexer);
45 void UpdateLexersAfterGC(); 45 void UpdateLexersAfterGC();
46 46
47 private: 47 private:
48 typedef std::set<LexerBase*> LexerSet;
48 Isolate* isolate_; 49 Isolate* isolate_;
49 std::set<LexerBase*> lexers_; 50 LexerSet lexers_;
50 }; 51 };
51 52
52 53
53 class LexerBase { 54 class LexerBase {
54 public: 55 public:
55 struct Location { 56 struct Location {
56 Location(int b, int e) : beg_pos(b), end_pos(e) { } 57 Location(int b, int e) : beg_pos(b), end_pos(e) { }
57 Location() : beg_pos(0), end_pos(0) { } 58 Location() : beg_pos(0), end_pos(0) { }
58 59
59 bool IsValid() const { 60 bool IsValid() const {
60 return beg_pos >= 0 && end_pos >= beg_pos; 61 return beg_pos >= 0 && end_pos >= beg_pos;
61 } 62 }
62 63
63 static Location invalid() { return Location(-1, -1); } 64 static Location invalid() { return Location(-1, -1); }
64 65
65 int beg_pos; 66 int beg_pos;
66 int end_pos; 67 int end_pos;
67 }; 68 };
68 69
69 explicit LexerBase(UnicodeCache* unicode_cache) 70 explicit LexerBase(UnicodeCache* unicode_cache);
70 : unicode_cache_(unicode_cache),
71 has_line_terminator_before_next_(true),
72 has_multiline_comment_before_next_(false),
73 current_literal_(&literals_[0]),
74 next_literal_(&literals_[1]),
75 harmony_numeric_literals_(false),
76 harmony_modules_(false),
77 harmony_scoping_(false) {
78 }
79 71
80 virtual ~LexerBase(); 72 virtual ~LexerBase();
81 73
82 // Returns the next token and advances input. 74 // Returns the next token and advances input.
83 Token::Value Next(); 75 Token::Value Next();
84 76
85 // Returns the current token again. 77 // Returns the current token again.
86 Token::Value current_token() const { return current_.token; } 78 Token::Value current_token() const { return current_.token; }
87 79
88 // Returns the location information for the current token 80 // Returns the location information for the current token
89 // (the token last returned by Next()). 81 // (the token last returned by Next()).
90 Location location() const { 82 Location location() const {
91 return Location(current_.beg_pos, current_.end_pos); 83 return Location(current_.beg_pos, current_.end_pos);
92 } 84 }
93 85
94 // One token look-ahead (past the token returned by Next()). 86 // One token look-ahead (past the token returned by Next()).
95 Token::Value peek() const { return next_.token; } 87 Token::Value peek() const { return next_.token; }
96 88
97 Location peek_location() const { 89 Location peek_location() const {
98 return Location(next_.beg_pos, next_.end_pos); 90 return Location(next_.beg_pos, next_.end_pos);
99 } 91 }
100 92
101 // Seek forward to the given position. This operation works for simple cases 93 // Seek forward to the given position. This operation works for simple cases
102 // such as seeking forward until simple delimiter tokens, which is what it is 94 // such as seeking forward until simple delimiter tokens, which is what it is
103 // used for. After this call, we will have the token at the given position as 95 // used for. After this call, we will have the token at the given position as
104 // the "next" token. The "current" token will be invalid. FIXME: for utf-8, 96 // the "next" token. The "current" token will be invalid. FIXME: for utf-8,
105 // we need to decide if pos is counted in characters or in bytes. 97 // we need to decide if pos is counted in characters or in bytes.
106 virtual void SeekForward(int pos) = 0; 98 virtual void SeekForward(int pos) = 0;
107 99
108 virtual void SetEnd(int pos) = 0;
109
110 // Scans the input as a regular expression pattern, previous character(s) must 100 // Scans the input as a regular expression pattern, previous character(s) must
111 // be /(=). Returns true if a pattern is scanned. FIXME: this won't work for 101 // be /(=). Returns true if a pattern is scanned. FIXME: this won't work for
112 // utf-8 newlines. 102 // utf-8 newlines.
113 virtual bool ScanRegExpPattern(bool seen_equal) = 0; 103 virtual bool ScanRegExpPattern(bool seen_equal) = 0;
114 104
115 // Returns true if regexp flags are scanned (always since flags can 105 // Returns true if regexp flags are scanned (always since flags can
116 // be empty). 106 // be empty).
117 virtual bool ScanRegExpFlags() = 0; 107 virtual bool ScanRegExpFlags() = 0;
118 108
119 // // Returns the location of the last seen octal literal. 109 // // Returns the location of the last seen octal literal.
120 virtual Location octal_position() const = 0; 110 virtual Location octal_position() const = 0;
121 111
122 virtual void clear_octal_position() = 0; 112 virtual void clear_octal_position() = 0;
123 113
124 // Returns true if there was a line terminator before the peek'ed token, 114 // Returns true if there was a line terminator before the peek'ed token,
125 // possibly inside a multi-line comment. 115 // possibly inside a multi-line comment.
126 bool HasAnyLineTerminatorBeforeNext() const { 116 bool HasAnyLineTerminatorBeforeNext() const {
127 return has_line_terminator_before_next_ || 117 return has_line_terminator_before_next_ ||
128 has_multiline_comment_before_next_; 118 has_multiline_comment_before_next_;
129 } 119 }
130 120
131 Handle<String> GetLiteralSymbol() { 121 Vector<const uint8_t> literal_one_byte_string() {
132 EnsureCurrentLiteralIsValid(); 122 EnsureCurrentLiteralIsValid();
133 return InternalizeLiteral(current_literal_); 123 return current_literal_->one_byte_string;
134 } 124 }
135 125
136 Handle<String> GetLiteralString(PretenureFlag tenured) { 126 Vector<const uint16_t> literal_two_byte_string() {
137 EnsureCurrentLiteralIsValid(); 127 EnsureCurrentLiteralIsValid();
138 return AllocateLiteral(current_literal_, tenured); 128 return current_literal_->two_byte_string;
139 }
140
141 Handle<String> GetNextLiteralString(PretenureFlag tenured) {
142 EnsureNextLiteralIsValid();
143 return AllocateLiteral(next_literal_, tenured);
144 }
145
146 Vector<const char> literal_ascii_string() {
147 EnsureCurrentLiteralIsValid();
148 return current_literal_->ascii_string;
149 }
150
151 Vector<const uc16> literal_utf16_string() {
152 EnsureCurrentLiteralIsValid();
153 return current_literal_->utf16_string;
154 } 129 }
155 130
156 int literal_length() { 131 int literal_length() {
157 EnsureCurrentLiteralIsValid(); 132 EnsureCurrentLiteralIsValid();
158 return current_literal_->length; 133 return current_literal_->length;
159 } 134 }
160 135
161 bool is_literal_ascii() { 136 bool is_literal_one_byte() {
162 EnsureCurrentLiteralIsValid(); 137 EnsureCurrentLiteralIsValid();
163 return current_literal_->is_ascii; 138 return current_literal_->is_one_byte;
164 } 139 }
165 140
166 bool is_literal_contextual_keyword(Vector<const char> keyword) { 141 bool is_literal_contextual_keyword(Vector<const uint8_t> keyword) {
167 if (!is_literal_ascii()) return false; 142 if (!is_literal_one_byte()) return false;
168 Vector<const char> literal = literal_ascii_string(); 143 Vector<const uint8_t> literal = literal_one_byte_string();
169 return literal.length() == keyword.length() && 144 return literal.length() == keyword.length() &&
170 (memcmp(literal.start(), keyword.start(), literal.length()) == 0); 145 (memcmp(literal.start(), keyword.start(), literal.length()) == 0);
171 } 146 }
172 147
173 bool literal_contains_escapes() const { 148 bool literal_contains_escapes() const {
174 return current_.has_escapes; 149 return current_.has_escapes;
175 } 150 }
176 151
177 Vector<const char> next_literal_ascii_string() { 152 Vector<const uint8_t> next_literal_one_byte_string() {
178 EnsureNextLiteralIsValid(); 153 EnsureNextLiteralIsValid();
179 return next_literal_->ascii_string; 154 return next_literal_->one_byte_string;
180 } 155 }
181 156
182 Vector<const uc16> next_literal_utf16_string() { 157 Vector<const uint16_t> next_literal_two_byte_string() {
183 EnsureNextLiteralIsValid(); 158 EnsureNextLiteralIsValid();
184 return next_literal_->utf16_string; 159 return next_literal_->two_byte_string;
185 } 160 }
186 161
187 int next_literal_length() { 162 int next_literal_length() {
188 EnsureNextLiteralIsValid(); 163 EnsureNextLiteralIsValid();
189 return next_literal_->length; 164 return next_literal_->length;
190 } 165 }
191 166
192 bool is_next_literal_ascii() { 167 bool is_next_literal_one_byte() {
193 EnsureNextLiteralIsValid(); 168 EnsureNextLiteralIsValid();
194 return next_literal_->is_ascii; 169 return next_literal_->is_one_byte;
195 } 170 }
196 171
197 bool is_next_contextual_keyword(Vector<const char> keyword) { 172 bool is_next_contextual_keyword(Vector<const uint8_t> keyword) {
198 if (!is_next_literal_ascii()) return false; 173 if (!is_next_literal_one_byte()) return false;
199 Vector<const char> literal = next_literal_ascii_string(); 174 Vector<const uint8_t> literal = next_literal_one_byte_string();
200 return literal.length() == keyword.length() && 175 return literal.length() == keyword.length() &&
201 (memcmp(literal.start(), keyword.start(), literal.length()) == 0); 176 (memcmp(literal.start(), keyword.start(), literal.length()) == 0);
202 } 177 }
203 178
204 bool HarmonyScoping() const { 179 bool HarmonyScoping() const {
205 return harmony_scoping_; 180 return harmony_scoping_;
206 } 181 }
207 182
208 void SetHarmonyScoping(bool scoping) { 183 void SetHarmonyScoping(bool scoping) {
209 harmony_scoping_ = scoping; 184 harmony_scoping_ = scoping;
(...skipping 21 matching lines...) Expand all
231 struct TokenDesc { 206 struct TokenDesc {
232 Token::Value token; 207 Token::Value token;
233 int beg_pos; 208 int beg_pos;
234 int end_pos; 209 int end_pos;
235 bool has_escapes; 210 bool has_escapes;
236 bool is_onebyte; 211 bool is_onebyte;
237 }; 212 };
238 213
239 struct LiteralDesc { 214 struct LiteralDesc {
240 int beg_pos; 215 int beg_pos;
241 bool is_ascii; 216 bool is_one_byte;
242 bool is_in_buffer; 217 bool is_in_buffer;
243 int offset; 218 int offset;
244 int length; 219 int length;
245 Vector<const char> ascii_string; 220 Vector<const uint8_t> one_byte_string;
246 Vector<const uc16> utf16_string; 221 Vector<const uint16_t> two_byte_string;
247 LiteralBuffer buffer; 222 LiteralBuffer buffer;
248 LiteralDesc() : beg_pos(-1), is_ascii(false), is_in_buffer(false), 223 LiteralDesc() : beg_pos(-1), is_one_byte(false), is_in_buffer(false),
249 offset(0), length(0) { } 224 offset(0), length(0) { }
250 bool Valid(int pos) { return beg_pos == pos; } 225 bool Valid(int pos) { return beg_pos == pos; }
251 }; 226 };
252 227
253 virtual void Scan() = 0; 228 virtual void Scan() = 0;
254 229
255 virtual void UpdateBufferBasedOnHandle() = 0; 230 virtual void UpdateBufferBasedOnHandle() = 0;
256 virtual bool FillLiteral(const TokenDesc& token, LiteralDesc* literal) = 0; 231 virtual bool FillLiteral(const TokenDesc& token, LiteralDesc* literal) = 0;
257 virtual Handle<String> InternalizeLiteral(LiteralDesc* literal) = 0; 232 virtual Handle<String> InternalizeLiteral(LiteralDesc* literal) = 0;
258 virtual Handle<String> AllocateLiteral(LiteralDesc* literal, 233 virtual Handle<String> AllocateLiteral(LiteralDesc* literal,
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
296 friend class Scanner; 271 friend class Scanner;
297 friend class LexerGCHandler; 272 friend class LexerGCHandler;
298 }; 273 };
299 274
300 275
301 template<typename Char> 276 template<typename Char>
302 class Lexer : public LexerBase { 277 class Lexer : public LexerBase {
303 public: 278 public:
304 Lexer(UnicodeCache* unicode_cache, 279 Lexer(UnicodeCache* unicode_cache,
305 Handle<String> source, 280 Handle<String> source,
306 int start_position_, 281 int start_position,
307 int end_position_); 282 int end_position);
308 Lexer(UnicodeCache* unicode_cache, const Char* source_ptr, int length); 283 Lexer(UnicodeCache* unicode_cache, const Char* source_ptr, int length);
309 virtual ~Lexer(); 284 virtual ~Lexer();
310 285
311 virtual void SeekForward(int pos); 286 virtual void SeekForward(int pos);
312 virtual void SetEnd(int pos);
313 virtual bool ScanRegExpPattern(bool seen_equal); 287 virtual bool ScanRegExpPattern(bool seen_equal);
314 virtual bool ScanRegExpFlags(); 288 virtual bool ScanRegExpFlags();
315 virtual Location octal_position() const; 289 virtual Location octal_position() const;
316 virtual void clear_octal_position() { last_octal_end_ = NULL; } 290 virtual void clear_octal_position() { last_octal_end_ = NULL; }
317 291
318 protected: 292 protected:
319 virtual void Scan(); 293 virtual void Scan();
320 294
321 const Char* GetNewBufferBasedOnHandle() const; 295 const Char* GetNewBufferBasedOnHandle() const;
322 virtual void UpdateBufferBasedOnHandle(); 296 virtual void UpdateBufferBasedOnHandle();
(...skipping 23 matching lines...) Expand all
346 320
347 // Returns true if the literal of the token can be represented as a 321 // Returns true if the literal of the token can be represented as a
348 // substring of the source. 322 // substring of the source.
349 bool IsSubstringOfSource(const TokenDesc& token); 323 bool IsSubstringOfSource(const TokenDesc& token);
350 324
351 bool CopyToLiteralBuffer(const Char* start, 325 bool CopyToLiteralBuffer(const Char* start,
352 const Char* end, 326 const Char* end,
353 const TokenDesc& token, 327 const TokenDesc& token,
354 LiteralDesc* literal); 328 LiteralDesc* literal);
355 329
330 // One of source_handle_ or source_ptr_ is set.
331 // If source_ptr_ is set, isolate_ is 0 and no isolate accesses are allowed.
356 Isolate* isolate_; 332 Isolate* isolate_;
357 const Handle<String> source_handle_; 333 const Handle<String> source_handle_;
358 const Char* const source_ptr_; 334 const Char* const source_ptr_;
359 const int start_position_; 335 const int start_position_;
360 const int end_position_; 336 const int end_position_;
337 // Stream variables.
361 const Char* buffer_; 338 const Char* buffer_;
362 const Char* buffer_end_; 339 const Char* buffer_end_;
363 const Char* start_; 340 const Char* start_;
364 const Char* cursor_; 341 const Char* cursor_;
365
366 // Where we have seen the last octal number or an octal escape inside a 342 // Where we have seen the last octal number or an octal escape inside a
367 // string. Used by octal_position(). 343 // string. Used by octal_position().
368 const Char* last_octal_end_; 344 const Char* last_octal_end_;
369 }; 345 };
370 346
371 347
372 #ifdef V8_USE_GENERATED_LEXER 348 #ifdef V8_USE_GENERATED_LEXER
373 349
374 350
375 // Match old scanner interface. 351 // Match old scanner interface.
376 class Scanner { 352 class Scanner {
377 public: 353 public:
378 typedef LexerBase::Location Location; 354 typedef LexerBase::Location Location;
379 355
380 explicit Scanner(UnicodeCache* unicode_cache); 356 explicit Scanner(UnicodeCache* unicode_cache);
381 357
382 ~Scanner() { delete lexer_; } 358 ~Scanner() { delete lexer_; }
383 359
384 void Initialize(Utf16CharacterStream* source); 360 void Initialize(Utf16CharacterStream* source);
385 361
386 inline void SeekForward(int pos) { lexer_->SeekForward(pos); } 362 inline void SeekForward(int pos) { lexer_->SeekForward(pos); }
387 363
388 inline void SetEnd(int pos) { lexer_->SetEnd(pos); }
389
390 inline bool ScanRegExpPattern(bool seen_equal) { 364 inline bool ScanRegExpPattern(bool seen_equal) {
391 return lexer_->ScanRegExpPattern(seen_equal); 365 return lexer_->ScanRegExpPattern(seen_equal);
392 } 366 }
393 367
394 inline bool ScanRegExpFlags() { return lexer_->ScanRegExpFlags(); } 368 inline bool ScanRegExpFlags() { return lexer_->ScanRegExpFlags(); }
395 369
396 inline Location octal_position() const { return lexer_->octal_position(); } 370 inline Location octal_position() const { return lexer_->octal_position(); }
397 371
398 inline void clear_octal_position() { lexer_->clear_octal_position(); } 372 inline void clear_octal_position() { lexer_->clear_octal_position(); }
399 373
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
433 407
434 inline void SetHarmonyNumericLiterals(bool numeric_literals) { 408 inline void SetHarmonyNumericLiterals(bool numeric_literals) {
435 harmony_numeric_literals_ = numeric_literals; 409 harmony_numeric_literals_ = numeric_literals;
436 SyncSettings(); 410 SyncSettings();
437 } 411 }
438 412
439 inline bool HasAnyLineTerminatorBeforeNext() const { 413 inline bool HasAnyLineTerminatorBeforeNext() const {
440 return lexer_->HasAnyLineTerminatorBeforeNext(); 414 return lexer_->HasAnyLineTerminatorBeforeNext();
441 } 415 }
442 416
443 inline Handle<String> GetLiteralSymbol() {
444 return lexer_->GetLiteralSymbol();
445 }
446
447 inline Handle<String> GetLiteralString(PretenureFlag tenured) {
448 return lexer_->GetLiteralString(tenured);
449 }
450
451 inline Handle<String> GetNextLiteralString(PretenureFlag tenured) {
452 return lexer_->GetNextLiteralString(tenured);
453 }
454
455 inline Vector<const char> literal_ascii_string() { 417 inline Vector<const char> literal_ascii_string() {
456 return lexer_->literal_ascii_string(); 418 return Vector<const char>::cast(lexer_->literal_one_byte_string());
457 } 419 }
458 420
459 inline Vector<const uc16> literal_utf16_string() { 421 inline Vector<const uc16> literal_utf16_string() {
460 return lexer_->literal_utf16_string(); 422 return lexer_->literal_two_byte_string();
461 } 423 }
462 424
463 inline int literal_length() { 425 inline int literal_length() {
464 return lexer_->literal_length(); 426 return lexer_->literal_length();
465 } 427 }
466 428
467 inline bool is_literal_ascii() { 429 inline bool is_literal_ascii() {
468 return lexer_->is_literal_ascii(); 430 return lexer_->is_literal_one_byte();
469 } 431 }
470 432
471 inline bool is_literal_contextual_keyword(Vector<const char> keyword) { 433 inline bool is_literal_contextual_keyword(
472 return lexer_->is_literal_contextual_keyword(keyword); 434 Vector<const char>& keyword) { // NOLINT
435 return lexer_->is_literal_contextual_keyword(
436 Vector<const uint8_t>::cast(keyword));
473 } 437 }
474 438
475 inline bool literal_contains_escapes() const { 439 inline bool literal_contains_escapes() const {
476 return lexer_->literal_contains_escapes(); 440 return lexer_->literal_contains_escapes();
477 } 441 }
478 442
479 inline Vector<const char> next_literal_ascii_string() { 443 inline Vector<const char> next_literal_ascii_string() {
480 return lexer_->next_literal_ascii_string(); 444 return Vector<const char>::cast(lexer_->next_literal_one_byte_string());
481 } 445 }
482 446
483 inline Vector<const uc16> next_literal_utf16_string() { 447 inline Vector<const uc16> next_literal_utf16_string() {
484 return lexer_->next_literal_utf16_string(); 448 return lexer_->next_literal_two_byte_string();
485 } 449 }
486 450
487 inline int next_literal_length() { 451 inline int next_literal_length() {
488 return lexer_->next_literal_length(); 452 return lexer_->next_literal_length();
489 } 453 }
490 454
491 inline bool is_next_literal_ascii() { 455 inline bool is_next_literal_ascii() {
492 return lexer_->is_next_literal_ascii(); 456 return lexer_->is_next_literal_one_byte();
493 } 457 }
494 458
495 inline bool is_next_contextual_keyword(Vector<const char> keyword) { 459 inline bool is_next_contextual_keyword(
496 return lexer_->is_next_contextual_keyword(keyword); 460 Vector<const char>& keyword) { // NOLINT
461 return lexer_->is_next_contextual_keyword(
462 Vector<const uint8_t>::cast(keyword));
497 } 463 }
498 464
499 private: 465 private:
500 void SyncSettings(); 466 void SyncSettings();
501 467
502 UnicodeCache* unicode_cache_; 468 UnicodeCache* unicode_cache_;
503 LexerBase* lexer_; 469 LexerBase* lexer_;
504 bool harmony_numeric_literals_; 470 bool harmony_numeric_literals_;
505 bool harmony_modules_; 471 bool harmony_modules_;
506 bool harmony_scoping_; 472 bool harmony_scoping_;
507 }; 473 };
508 474
509 475
510 #endif 476 #endif
511 477
512 478
513 } } 479 } }
514 480
515 #endif // V8_LEXER_EXPERIMENTAL_SCANNER_H 481 #endif // V8_LEXER_EXPERIMENTAL_SCANNER_H
OLDNEW
« no previous file with comments | « no previous file | src/lexer/lexer.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698