Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(474)

Side by Side Diff: src/lexer/experimental-scanner.h

Issue 140913009: Experimental lexer: fix internalization and allocation of literals. (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: minor fixes Created 6 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright 2013 the V8 project authors. All rights reserved. 1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 139 matching lines...) Expand 10 before | Expand all | Expand 10 after
150 harmony_numeric_literals_ = numeric_literals; 150 harmony_numeric_literals_ = numeric_literals;
151 } 151 }
152 152
153 // Returns true if there was a line terminator before the peek'ed token, 153 // Returns true if there was a line terminator before the peek'ed token,
154 // possibly inside a multi-line comment. 154 // possibly inside a multi-line comment.
155 bool HasAnyLineTerminatorBeforeNext() const { 155 bool HasAnyLineTerminatorBeforeNext() const {
156 return has_line_terminator_before_next_ || 156 return has_line_terminator_before_next_ ||
157 has_multiline_comment_before_next_; 157 has_multiline_comment_before_next_;
158 } 158 }
159 159
160 Handle<String> GetLiteralSymbol() {
161 EnsureCurrentLiteralIsValid();
162 return InternalizeLiteral(current_literal_);
163 }
164
165 Handle<String> GetLiteralString(PretenureFlag tenured) {
166 EnsureCurrentLiteralIsValid();
167 return AllocateLiteral(current_literal_, tenured);
168 }
169
170 Handle<String> GetNextLiteralString(PretenureFlag tenured) {
171 EnsureNextLiteralIsValid();
172 return AllocateLiteral(next_literal_, tenured);
173 }
174
160 Vector<const char> literal_ascii_string() { 175 Vector<const char> literal_ascii_string() {
161 if (!current_literal_->Valid(current_.beg_pos)) { 176 EnsureCurrentLiteralIsValid();
162 FillLiteral(current_, current_literal_);
163 }
164 return current_literal_->ascii_string; 177 return current_literal_->ascii_string;
165 } 178 }
166 179
167 Vector<const uc16> literal_utf16_string() { 180 Vector<const uc16> literal_utf16_string() {
168 if (!current_literal_->Valid(current_.beg_pos)) { 181 EnsureCurrentLiteralIsValid();
169 FillLiteral(current_, current_literal_);
170 }
171 return current_literal_->utf16_string; 182 return current_literal_->utf16_string;
172 } 183 }
173 184
174 int literal_length() { 185 int literal_length() {
175 if (!current_literal_->Valid(current_.beg_pos)) { 186 EnsureCurrentLiteralIsValid();
176 FillLiteral(current_, current_literal_);
177 }
178 return current_literal_->length; 187 return current_literal_->length;
179 } 188 }
180 189
181 // This should be is_onebyte or is_latin1; it doesn't mean ASCII for real. 190 // This should be is_onebyte or is_latin1; it doesn't mean ASCII for real.
182 bool is_literal_ascii() { 191 bool is_literal_ascii() {
183 if (!current_literal_->Valid(current_.beg_pos)) { 192 EnsureCurrentLiteralIsValid();
184 FillLiteral(current_, current_literal_);
185 }
186 return current_literal_->is_ascii; 193 return current_literal_->is_ascii;
187 } 194 }
188 195
189 bool is_literal_contextual_keyword(Vector<const char> keyword) { 196 bool is_literal_contextual_keyword(Vector<const char> keyword) {
190 if (!is_literal_ascii()) return false; 197 if (!is_literal_ascii()) return false;
191 Vector<const char> literal = literal_ascii_string(); 198 Vector<const char> literal = literal_ascii_string();
192 return literal.length() == keyword.length() && 199 return literal.length() == keyword.length() &&
193 (memcmp(literal.start(), keyword.start(), literal.length()) == 0); 200 (memcmp(literal.start(), keyword.start(), literal.length()) == 0);
194 } 201 }
195 202
196 bool literal_contains_escapes() const { 203 bool literal_contains_escapes() const {
197 return current_.has_escapes; 204 return current_.has_escapes;
198 } 205 }
199 206
200 Vector<const char> next_literal_ascii_string() { 207 Vector<const char> next_literal_ascii_string() {
201 if (!next_literal_->Valid(next_.beg_pos)) { 208 EnsureNextLiteralIsValid();
202 FillLiteral(next_, next_literal_);
203 }
204 return next_literal_->ascii_string; 209 return next_literal_->ascii_string;
205 } 210 }
206 211
207 Vector<const uc16> next_literal_utf16_string() { 212 Vector<const uc16> next_literal_utf16_string() {
208 if (!next_literal_->Valid(next_.beg_pos)) { 213 EnsureNextLiteralIsValid();
209 FillLiteral(next_, next_literal_);
210 }
211 return next_literal_->utf16_string; 214 return next_literal_->utf16_string;
212 } 215 }
213 216
214 int next_literal_length() { 217 int next_literal_length() {
215 if (!next_literal_->Valid(next_.beg_pos)) { 218 EnsureNextLiteralIsValid();
216 FillLiteral(next_, next_literal_);
217 }
218 return next_literal_->length; 219 return next_literal_->length;
219 } 220 }
220 221
221 bool is_next_literal_ascii() { 222 bool is_next_literal_ascii() {
222 if (!next_literal_->Valid(next_.beg_pos)) { 223 EnsureNextLiteralIsValid();
223 FillLiteral(next_, next_literal_);
224 }
225 return next_literal_->is_ascii; 224 return next_literal_->is_ascii;
226 } 225 }
227 226
228 bool is_next_contextual_keyword(Vector<const char> keyword) { 227 bool is_next_contextual_keyword(Vector<const char> keyword) {
229 if (!is_next_literal_ascii()) return false; 228 if (!is_next_literal_ascii()) return false;
230 Vector<const char> literal = next_literal_ascii_string(); 229 Vector<const char> literal = next_literal_ascii_string();
231 return literal.length() == keyword.length() && 230 return literal.length() == keyword.length() &&
232 (memcmp(literal.start(), keyword.start(), literal.length()) == 0); 231 (memcmp(literal.start(), keyword.start(), literal.length()) == 0);
233 } 232 }
234 233
235 protected: 234 protected:
236 struct TokenDesc { 235 struct TokenDesc {
237 Token::Value token; 236 Token::Value token;
238 int beg_pos; 237 int beg_pos;
239 int end_pos; 238 int end_pos;
240 bool has_escapes; 239 bool has_escapes;
241 }; 240 };
242 241
243 struct LiteralDesc { 242 struct LiteralDesc {
244 int beg_pos; 243 int beg_pos;
245 bool is_ascii; 244 bool is_ascii;
245 bool is_in_buffer;
246 int offset;
246 int length; 247 int length;
247 Vector<const char> ascii_string; 248 Vector<const char> ascii_string;
248 Vector<const uc16> utf16_string; 249 Vector<const uc16> utf16_string;
249 LiteralBuffer buffer; 250 LiteralBuffer buffer;
250 LiteralDesc() : beg_pos(-1), is_ascii(false), length(0) { } 251 LiteralDesc() : beg_pos(-1), is_ascii(false), is_in_buffer(false),
252 offset(0), length(0) { }
251 bool Valid(int pos) { return beg_pos == pos; } 253 bool Valid(int pos) { return beg_pos == pos; }
252 }; 254 };
253 255
254 virtual void Scan() = 0; 256 virtual void Scan() = 0;
255 virtual bool FillLiteral(const TokenDesc& token, LiteralDesc* literal) = 0; 257 virtual bool FillLiteral(const TokenDesc& token, LiteralDesc* literal) = 0;
256 258
257 void ResetLiterals() { 259 void ResetLiterals() {
258 current_literal_->beg_pos = -1; 260 if (!current_literal_->is_in_buffer) current_literal_->beg_pos = -1;
259 next_literal_->beg_pos = -1; 261 if (!next_literal_->is_in_buffer) next_literal_->beg_pos = -1;
marja 2014/01/20 07:54:33 Shouldn't we reset is_in_buffer here too? Why not?
ulan 2014/01/20 09:15:00 beg_pos == -1 indicates an invalid buffer, for inv
260 } 262 }
261 263
264 void EnsureCurrentLiteralIsValid() {
265 if (!current_literal_->Valid(current_.beg_pos)) {
266 FillLiteral(current_, current_literal_);
267 }
268 }
269
270 void EnsureNextLiteralIsValid() {
271 if (!next_literal_->Valid(next_.beg_pos)) {
272 FillLiteral(next_, next_literal_);
273 }
274 }
275
276 virtual Handle<String> InternalizeLiteral(LiteralDesc* literal) = 0;
277 virtual Handle<String> AllocateLiteral(LiteralDesc* literal,
278 PretenureFlag tenured) = 0;
279
262 Isolate* isolate_; 280 Isolate* isolate_;
263 UnicodeCache* unicode_cache_; 281 UnicodeCache* unicode_cache_;
264 282
265 bool has_line_terminator_before_next_; 283 bool has_line_terminator_before_next_;
266 // Whether there is a multiline comment *with a line break* before the next 284 // Whether there is a multiline comment *with a line break* before the next
267 // token. 285 // token.
268 bool has_multiline_comment_before_next_; 286 bool has_multiline_comment_before_next_;
269 287
270 TokenDesc current_; // desc for current token (as returned by Next()) 288 TokenDesc current_; // desc for current token (as returned by Next())
271 TokenDesc next_; // desc for next token (one token look-ahead) 289 TokenDesc next_; // desc for next token (one token look-ahead)
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after
332 ResetLiterals(); 350 ResetLiterals();
333 } 351 }
334 } 352 }
335 353
336 protected: 354 protected:
337 virtual void Scan(); 355 virtual void Scan();
338 356
339 const Char* GetNewBufferBasedOnHandle() const; 357 const Char* GetNewBufferBasedOnHandle() const;
340 358
341 virtual bool FillLiteral(const TokenDesc& token, LiteralDesc* literal); 359 virtual bool FillLiteral(const TokenDesc& token, LiteralDesc* literal);
360 virtual Handle<String> InternalizeLiteral(LiteralDesc* literal);
361 virtual Handle<String> AllocateLiteral(LiteralDesc* literal,
362 PretenureFlag tenured);
363
342 364
343 private: 365 private:
344 bool ValidIdentifierPart() { 366 bool ValidIdentifierPart() {
345 return unicode_cache_->IsIdentifierPart(ScanHexNumber(4)); 367 return unicode_cache_->IsIdentifierPart(ScanHexNumber(4));
346 } 368 }
347 369
348 bool ValidIdentifierStart() { 370 bool ValidIdentifierStart() {
349 return unicode_cache_->IsIdentifierStart(ScanHexNumber(4)); 371 return unicode_cache_->IsIdentifierStart(ScanHexNumber(4));
350 } 372 }
351 373
352 uc32 ScanHexNumber(int length); 374 uc32 ScanHexNumber(int length);
353 bool ScanLiteralUnicodeEscape(); 375 bool ScanLiteralUnicodeEscape();
354 376
355 const Char* ScanHexNumber(const Char* start, 377 const Char* ScanHexNumber(const Char* start,
356 const Char* end, 378 const Char* end,
357 uc32* result); 379 uc32* result);
358 const Char* ScanOctalEscape(const Char* start, 380 const Char* ScanOctalEscape(const Char* start,
359 const Char* end, 381 const Char* end,
360 uc32* result); 382 uc32* result);
361 const Char* ScanIdentifierUnicodeEscape(const Char* start, 383 const Char* ScanIdentifierUnicodeEscape(const Char* start,
362 const Char* end, 384 const Char* end,
363 uc32* result); 385 uc32* result);
364 const Char* ScanEscape(const Char* start, 386 const Char* ScanEscape(const Char* start,
365 const Char* end, 387 const Char* end,
366 LiteralBuffer* literal); 388 LiteralBuffer* literal);
367 389
390 bool IsSubstringOfSource(const TokenDesc& token);
391
392 bool CopyToLiteralBuffer(const Char* start,
393 const Char* end,
394 const TokenDesc& token,
395 LiteralDesc* literal);
396
368 Handle<String> source_handle_; 397 Handle<String> source_handle_;
369 const Char* buffer_; 398 const Char* buffer_;
370 const Char* buffer_end_; 399 const Char* buffer_end_;
371 const Char* start_; 400 const Char* start_;
372 const Char* cursor_; 401 const Char* cursor_;
373 402
374 // Where we have seen the last octal number or an octal escape inside a 403 // Where we have seen the last octal number or an octal escape inside a
375 // string. Used by octal_position(). 404 // string. Used by octal_position().
376 const Char* last_octal_end_; 405 const Char* last_octal_end_;
377 }; 406 };
(...skipping 228 matching lines...) Expand 10 before | Expand all | Expand 10 after
606 // character. 635 // character.
607 const Char* temp_cursor = last_octal_end_ - 1; 636 const Char* temp_cursor = last_octal_end_ - 1;
608 while (temp_cursor >= buffer_ && *temp_cursor >= '0' && *temp_cursor <= '7') 637 while (temp_cursor >= buffer_ && *temp_cursor >= '0' && *temp_cursor <= '7')
609 --temp_cursor; 638 --temp_cursor;
610 return Location(temp_cursor - buffer_ + 1, last_octal_end_ - buffer_); 639 return Location(temp_cursor - buffer_ + 1, last_octal_end_ - buffer_);
611 } 640 }
612 641
613 } } 642 } }
614 643
615 #endif // V8_LEXER_EXPERIMENTAL_SCANNER_H 644 #endif // V8_LEXER_EXPERIMENTAL_SCANNER_H
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698