Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(445)

Side by Side Diff: src/lexer/lexer.cc

Issue 187603004: Experimental parser: make utf8 sort of work (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 6 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/lexer/lexer.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2013 the V8 project authors. All rights reserved. 1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 121 matching lines...) Expand 10 before | Expand all | Expand 10 after
132 void LexerGCHandler::UpdateLexersAfterGC() { 132 void LexerGCHandler::UpdateLexersAfterGC() {
133 typedef std::set<LexerBase*>::const_iterator It; 133 typedef std::set<LexerBase*>::const_iterator It;
134 for (It it = lexers_.begin(); it != lexers_.end(); ++it) { 134 for (It it = lexers_.begin(); it != lexers_.end(); ++it) {
135 (*it)->UpdateBufferBasedOnHandle(); 135 (*it)->UpdateBufferBasedOnHandle();
136 } 136 }
137 } 137 }
138 138
139 139
140 LexerBase::LexerBase(UnicodeCache* unicode_cache) 140 LexerBase::LexerBase(UnicodeCache* unicode_cache)
141 : unicode_cache_(unicode_cache), 141 : unicode_cache_(unicode_cache),
142 current_literal_(&literals_[0]),
143 next_literal_(&literals_[1]),
142 has_line_terminator_before_next_(true), 144 has_line_terminator_before_next_(true),
143 has_multiline_comment_before_next_(false), 145 has_multiline_comment_before_next_(false),
144 current_literal_(&literals_[0]),
145 next_literal_(&literals_[1]),
146 harmony_numeric_literals_(false), 146 harmony_numeric_literals_(false),
147 harmony_modules_(false), 147 harmony_modules_(false),
148 harmony_scoping_(false) { 148 harmony_scoping_(false) {
149 } 149 }
150 150
151 151
152 LexerBase::~LexerBase() {} 152 LexerBase::~LexerBase() {}
153 153
154 154
155 // Returns the next token and advances input. 155 // Returns the next token and advances input.
156 Token::Value LexerBase::Next() { 156 Token::Value LexerBase::Next() {
157 has_line_terminator_before_next_ = false; 157 has_line_terminator_before_next_ = false;
158 has_multiline_comment_before_next_ = false; 158 has_multiline_comment_before_next_ = false;
159 current_ = next_; 159 current_ = next_;
160 std::swap(current_literal_, next_literal_); 160 std::swap(current_literal_, next_literal_);
161 Scan(); 161 Scan();
162 return current_.token; 162 return current_.token;
163 } 163 }
164 164
165 165
166 template<typename Char> 166 template<typename Char>
167 Lexer<Char>::Lexer(UnicodeCache* unicode_cache, 167 Lexer<Char>::Lexer(UnicodeCache* unicode_cache,
168 const Char* source_ptr, 168 const Char* source_ptr,
169 int length) 169 int length)
170 : LexerBase(unicode_cache), 170 : LexerBase(unicode_cache),
171 isolate_(NULL), 171 isolate_(NULL),
172 source_ptr_(source_ptr), 172 source_ptr_(source_ptr),
173 start_position_(0),
174 end_position_(length), 173 end_position_(length),
175 buffer_(NULL), 174 buffer_(source_ptr),
176 buffer_end_(NULL), 175 buffer_end_(source_ptr + length),
177 start_(NULL), 176 start_(source_ptr),
178 cursor_(NULL), 177 cursor_(source_ptr),
179 last_octal_end_(NULL) { 178 last_octal_end_(NULL) {
180 CHECK(false); // not yet supported 179 current_.beg_pos = current_.end_pos = next_.beg_pos = next_.end_pos = 0;
181 } 180 }
182 181
183 182
184 template<typename Char> 183 template<typename Char>
185 Lexer<Char>::Lexer(UnicodeCache* unicode_cache, 184 Lexer<Char>::Lexer(UnicodeCache* unicode_cache,
186 Handle<String> source, 185 Handle<String> source,
187 int start_position, 186 int start_position,
188 int end_position) 187 int end_position)
189 : LexerBase(unicode_cache), 188 : LexerBase(unicode_cache),
190 isolate_(source->GetIsolate()), 189 isolate_(source->GetIsolate()),
191 source_handle_(FlattenGetString(source)), 190 source_handle_(FlattenGetString(source)),
192 source_ptr_(NULL), 191 source_ptr_(NULL),
193 start_position_(start_position),
194 end_position_(end_position), 192 end_position_(end_position),
195 buffer_(NULL), 193 buffer_(NULL),
196 buffer_end_(NULL), 194 buffer_end_(NULL),
197 start_(NULL), 195 start_(NULL),
198 cursor_(NULL), 196 cursor_(NULL),
199 last_octal_end_(NULL) { 197 last_octal_end_(NULL) {
198 cursor_ += start_position;
200 UpdateBufferBasedOnHandle(); 199 UpdateBufferBasedOnHandle();
200 isolate_->lexer_gc_handler()->AddLexer(this);
201 current_.beg_pos = current_.end_pos = next_.beg_pos = next_.end_pos = 0; 201 current_.beg_pos = current_.end_pos = next_.beg_pos = next_.end_pos = 0;
202 isolate_->lexer_gc_handler()->AddLexer(this);
203 // TODO(dcarney): move this to UpdateBufferBasedOnHandle
204 cursor_ = buffer_ + start_position;
205 buffer_end_ = buffer_ + end_position;
206 start_ = cursor_;
207 } 202 }
208 203
209 204
210 template<typename Char> 205 template<typename Char>
211 Lexer<Char>::~Lexer() { 206 Lexer<Char>::~Lexer() {
212 if (!source_handle_.is_null()) { 207 if (!source_handle_.is_null()) {
213 isolate_->lexer_gc_handler()->RemoveLexer(this); 208 isolate_->lexer_gc_handler()->RemoveLexer(this);
214 } 209 }
215 } 210 }
216 211
217 212
213 // TODO(dcarney): utf8 handling
218 template<typename Char> 214 template<typename Char>
219 void Lexer<Char>::SeekForward(int pos) { 215 void Lexer<Char>::SeekForward(int pos) {
216 // TODO(dcarney): utf8 handling
220 cursor_ = buffer_ + pos; 217 cursor_ = buffer_ + pos;
221 start_ = cursor_; 218 start_ = cursor_;
222 has_line_terminator_before_next_ = false; 219 has_line_terminator_before_next_ = false;
223 has_multiline_comment_before_next_ = false; 220 has_multiline_comment_before_next_ = false;
224 Scan(); // Fills in next_. 221 Scan();
225 } 222 }
226 223
227 224
225 // TODO(dcarney): utf8 handling
228 template<typename Char> 226 template<typename Char>
229 bool Lexer<Char>::ScanRegExpPattern(bool seen_equal) { 227 bool Lexer<Char>::ScanRegExpPattern(bool seen_equal) {
230 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags 228 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags
231 bool in_character_class = false; 229 bool in_character_class = false;
232 230
233 // Previous token is either '/' or '/=', in the second case, the 231 // Previous token is either '/' or '/=', in the second case, the
234 // pattern starts at =. 232 // pattern starts at =.
235 next_.beg_pos = next_.end_pos = (cursor_ - buffer_) - (seen_equal ? 1 : 0); 233 next_.beg_pos = next_.end_pos = (cursor_ - buffer_) - (seen_equal ? 1 : 0);
236 234
237 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, 235 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,
(...skipping 24 matching lines...) Expand all
262 if (*cursor_ == ']') in_character_class = false; 260 if (*cursor_ == ']') in_character_class = false;
263 if (++cursor_ >= buffer_end_) return false; 261 if (++cursor_ >= buffer_end_) return false;
264 } 262 }
265 } 263 }
266 next_.end_pos = (cursor_ - buffer_); 264 next_.end_pos = (cursor_ - buffer_);
267 ++cursor_; // consume '/' 265 ++cursor_; // consume '/'
268 return true; 266 return true;
269 } 267 }
270 268
271 269
270 // TODO(dcarney): utf8 handling
272 template<typename Char> 271 template<typename Char>
273 bool Lexer<Char>::ScanRegExpFlags() { 272 bool Lexer<Char>::ScanRegExpFlags() {
274 next_.beg_pos = cursor_ - buffer_; 273 next_.beg_pos = cursor_ - buffer_;
275 // Scan regular expression flags. 274 // Scan regular expression flags.
276 while (cursor_ < buffer_end_ && unicode_cache_->IsIdentifierPart(*cursor_)) { 275 while (cursor_ < buffer_end_ && unicode_cache_->IsIdentifierPart(*cursor_)) {
277 if (*cursor_ != '\\') { 276 if (*cursor_ != '\\') {
278 if (++cursor_ >= buffer_end_) break; 277 if (++cursor_ >= buffer_end_) break;
279 } else { 278 } else {
280 if (!ScanLiteralUnicodeEscape()) break; 279 if (!ScanLiteralUnicodeEscape()) break;
281 if (++cursor_ >= buffer_end_) break; 280 if (++cursor_ >= buffer_end_) break;
(...skipping 13 matching lines...) Expand all
295 if (d < 0) { 294 if (d < 0) {
296 return -1; 295 return -1;
297 } 296 }
298 x = x * 16 + d; 297 x = x * 16 + d;
299 } 298 }
300 return x; 299 return x;
301 } 300 }
302 301
303 302
304 template<typename Char> 303 template<typename Char>
305 const Char* Lexer<Char>::ScanHexNumber( 304 static const Char* ScanHexNumber(
306 const Char* cursor, const Char* end, uc32* result) { 305 const Char* cursor, const Char* end, uc32* result) {
307 uc32 x = 0; 306 uc32 x = 0;
308 for ( ; cursor < end; ++cursor) { 307 for ( ; cursor < end; ++cursor) {
309 int d = HexValue(*cursor); 308 int d = HexValue(*cursor);
310 if (d < 0) { 309 if (d < 0) {
311 *result = -1; 310 *result = -1;
312 return NULL; 311 return NULL;
313 } 312 }
314 x = x * 16 + d; 313 x = x * 16 + d;
315 } 314 }
316 *result = x; 315 *result = x;
317 return cursor; 316 return cursor;
318 } 317 }
319 318
320 319
321 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of 320 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of
322 // ECMA-262. Other JS VMs support them. 321 // ECMA-262. Other JS VMs support them.
323 template<typename Char> 322 template<typename Char>
324 const Char* Lexer<Char>::ScanOctalEscape( 323 static const Char* ScanOctalEscape(
325 const Char* start, const Char* end, uc32* result) { 324 const Char* start, const Char* end, uc32* result) {
326 uc32 x = *result - '0'; 325 uc32 x = *result - '0';
327 const Char* cursor; 326 const Char* cursor;
328 for (cursor = start; cursor < end; cursor++) { 327 for (cursor = start; cursor < end; cursor++) {
329 int d = *cursor - '0'; 328 int d = *cursor - '0';
330 if (d < 0 || d > 7) break; 329 if (d < 0 || d > 7) break;
331 int nx = x * 8 + d; 330 int nx = x * 8 + d;
332 if (nx >= 256) break; 331 if (nx >= 256) break;
333 x = nx; 332 x = nx;
334 } 333 }
335 *result = x; 334 *result = x;
336 return cursor; 335 return cursor;
337 } 336 }
338 337
339 338
339 // TODO(dcarney): utf8 handling
340 template<typename Char> 340 template<typename Char>
341 bool Lexer<Char>::ScanLiteralUnicodeEscape() { 341 bool Lexer<Char>::ScanLiteralUnicodeEscape() {
342 ASSERT(cursor_ < buffer_end_); 342 ASSERT(cursor_ < buffer_end_);
343 Char primary_char = *(cursor_); 343 Char primary_char = *(cursor_);
344 ASSERT(primary_char == '\\'); 344 ASSERT(primary_char == '\\');
345 if (++cursor_ >= buffer_end_) return false; 345 if (++cursor_ >= buffer_end_) return false;
346 primary_char = *(cursor_); 346 primary_char = *(cursor_);
347 int i = 1; 347 int i = 1;
348 if (primary_char == 'u') { 348 if (primary_char == 'u') {
349 i++; 349 i++;
350 while (i < 6) { 350 while (i < 6) {
351 if (++cursor_ >= buffer_end_) return false; 351 if (++cursor_ >= buffer_end_) return false;
352 primary_char = *(cursor_); 352 primary_char = *(cursor_);
353 if (!IsHexDigit(primary_char)) break; 353 if (!IsHexDigit(primary_char)) break;
354 i++; 354 i++;
355 } 355 }
356 } 356 }
357 return i == 6; 357 return i == 6;
358 } 358 }
359 359
360 360
361 template<typename Char> 361 template<typename Char>
362 const Char* Lexer<Char>::ScanIdentifierUnicodeEscape( 362 static const Char* ScanIdentifierUnicodeEscape(
363 const Char* cursor, const Char* end, uc32* result) { 363 const Char* cursor, const Char* end, uc32* result) {
364 ASSERT(*cursor == '\\'); 364 ASSERT(*cursor == '\\');
365 if (++cursor >= end) return NULL; 365 if (++cursor >= end) return NULL;
366 if (*cursor != 'u') return NULL; 366 if (*cursor != 'u') return NULL;
367 ++cursor; 367 ++cursor;
368 if (cursor + 4 > end) return NULL; 368 if (cursor + 4 > end) return NULL;
369 cursor = ScanHexNumber(cursor, cursor + 4, result); 369 cursor = ScanHexNumber(cursor, cursor + 4, result);
370 return cursor; 370 return cursor;
371 } 371 }
372 372
373 373
374 template<typename Char> 374 template<typename Char>
375 const Char* Lexer<Char>::ScanEscape( 375 static const Char* ScanEscape(UnicodeCache* cache,
376 const Char* cursor, const Char* end, LiteralBuffer* literal) { 376 const Char* cursor,
377 const Char* end,
378 LiteralBuffer* literal) {
377 ASSERT(*cursor == '\\'); 379 ASSERT(*cursor == '\\');
378 if (++cursor >= end) return NULL; 380 if (++cursor >= end) return NULL;
379 uc32 c = *cursor; 381 uc32 c = *cursor;
380 if (++cursor > end) return NULL; 382 if (++cursor > end) return NULL;
381 // Skip escaped newlines. 383 // Skip escaped newlines.
382 if (unicode_cache_->IsLineTerminator(c)) { 384 if (cache->IsLineTerminator(c)) {
383 uc32 peek = *cursor; 385 uc32 peek = *cursor;
384 // Allow CR+LF newlines in multiline string literals. 386 // Allow CR+LF newlines in multiline string literals.
385 if (IsCarriageReturn(c) && IsLineFeed(peek)) cursor++; 387 if (IsCarriageReturn(c) && IsLineFeed(peek)) cursor++;
386 // Allow LF+CR newlines in multiline string literals. 388 // Allow LF+CR newlines in multiline string literals.
387 if (IsLineFeed(c) && IsCarriageReturn(peek)) cursor++; 389 if (IsLineFeed(c) && IsCarriageReturn(peek)) cursor++;
388 return cursor; 390 return cursor;
389 } 391 }
390 392
391 switch (c) { 393 switch (c) {
392 case '\'': // fall through 394 case '\'': // fall through
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
425 // According to ECMA-262, section 7.8.4, characters not covered by the 427 // According to ECMA-262, section 7.8.4, characters not covered by the
426 // above cases should be illegal, but they are commonly handled as 428 // above cases should be illegal, but they are commonly handled as
427 // non-escaped characters by JS VMs. 429 // non-escaped characters by JS VMs.
428 literal->AddChar(c); 430 literal->AddChar(c);
429 return cursor; 431 return cursor;
430 } 432 }
431 433
432 434
433 template<typename Char> 435 template<typename Char>
434 LexerBase::Location Lexer<Char>::octal_position() const { 436 LexerBase::Location Lexer<Char>::octal_position() const {
435 if (!last_octal_end_) 437 if (!last_octal_end_) return Location::invalid();
436 return Location::invalid();
437 // The last octal might be an octal escape or an octal number. Whichever it 438 // The last octal might be an octal escape or an octal number. Whichever it
438 // is, we'll find the start by just scanning back until we hit a non-octal 439 // is, we'll find the start by just scanning back until we hit a non-octal
439 // character. 440 // character.
440 const Char* temp_cursor = last_octal_end_ - 1; 441 const Char* temp_cursor = last_octal_end_ - 1;
441 while (temp_cursor >= buffer_ && *temp_cursor >= '0' && *temp_cursor <= '7') 442 while (temp_cursor >= buffer_ && *temp_cursor >= '0' && *temp_cursor <= '7') {
442 --temp_cursor; 443 --temp_cursor;
444 }
443 return Location(temp_cursor - buffer_ + 1, last_octal_end_ - buffer_); 445 return Location(temp_cursor - buffer_ + 1, last_octal_end_ - buffer_);
444 } 446 }
445 447
446 448
447 template<> 449 template<>
448 const uint8_t* Lexer<uint8_t>::GetNewBufferBasedOnHandle() const { 450 const uint8_t* Lexer<uint8_t>::GetNewBufferBasedOnHandle() const {
449 String::FlatContent content = source_handle_->GetFlatContent(); 451 String::FlatContent content = source_handle_->GetFlatContent();
450 return content.ToOneByteVector().start(); 452 return content.ToOneByteVector().start();
451 } 453 }
452 454
(...skipping 17 matching lines...) Expand all
470 void Lexer<Char>::UpdateBufferBasedOnHandle() { 472 void Lexer<Char>::UpdateBufferBasedOnHandle() {
471 // We get a raw pointer from the Handle, but we also update it every time 473 // We get a raw pointer from the Handle, but we also update it every time
472 // there is a GC, so it is safe. 474 // there is a GC, so it is safe.
473 DisallowHeapAllocation no_gc; 475 DisallowHeapAllocation no_gc;
474 const Char* new_buffer = GetNewBufferBasedOnHandle(); 476 const Char* new_buffer = GetNewBufferBasedOnHandle();
475 if (new_buffer != buffer_) { 477 if (new_buffer != buffer_) {
476 int start_offset = start_ - buffer_; 478 int start_offset = start_ - buffer_;
477 int cursor_offset = cursor_ - buffer_; 479 int cursor_offset = cursor_ - buffer_;
478 int last_octal_end_offset = last_octal_end_ - buffer_; 480 int last_octal_end_offset = last_octal_end_ - buffer_;
479 buffer_ = new_buffer; 481 buffer_ = new_buffer;
480 buffer_end_ = buffer_ + source_handle_->length(); 482 buffer_end_ = buffer_ + end_position_;
481 start_ = buffer_ + start_offset; 483 start_ = buffer_ + start_offset;
482 cursor_ = buffer_ + cursor_offset; 484 cursor_ = buffer_ + cursor_offset;
483 if (last_octal_end_ != NULL) { 485 if (last_octal_end_ != NULL) {
484 last_octal_end_ = buffer_ + last_octal_end_offset; 486 last_octal_end_ = buffer_ + last_octal_end_offset;
485 } 487 }
486 ResetLiterals(); 488 current_literal_->Invalidate();
489 next_literal_->Invalidate();
487 } 490 }
488 } 491 }
489 492
490 493
491 template<> 494 void LexerBase::LiteralDesc::SetOneByteString(
492 bool Lexer<uint8_t>::IsSubstringOfSource(const TokenDesc& token) { 495 Vector<const uint8_t> string, bool owned) {
493 return !token.has_escapes; 496 is_in_buffer_ = false;
497 if (is_one_byte_string_owned_) {
498 one_byte_string_.Dispose();
499 }
500 is_one_byte_string_owned_ = owned;
501 is_one_byte_ = true;
502 one_byte_string_ = string;
503 }
504
505
506 void LexerBase::LiteralDesc::SetTwoByteString(Vector<const uint16_t> string) {
507 is_in_buffer_ = false;
508 is_one_byte_ = false;
509 two_byte_string_ = string;
510 }
511
512
513 void LexerBase::LiteralDesc::SetStringFromLiteralBuffer() {
514 is_one_byte_ = buffer.is_ascii();
515 is_in_buffer_ = true;
516 length = buffer.length();
517 if (is_one_byte_) {
518 if (is_one_byte_string_owned_) {
519 one_byte_string_.Dispose();
520 }
521 is_one_byte_string_owned_ = false;
522 one_byte_string_ = Vector<const uint8_t>::cast(buffer.ascii_literal());
523 } else {
524 two_byte_string_ = buffer.utf16_literal();
525 }
526 }
527
528
529 static inline bool IsOneByte(const uint8_t* cursor, const uint8_t* end) {
530 return true;
531 }
532
533
534 static inline bool IsOneByte(const uint16_t* cursor, const uint16_t* end) {
535 uint16_t acc = 0;
536 while (cursor != end) {
537 acc |= *cursor++ >> 8;
538 }
539 return acc == 0;
540 }
541
542
543 static inline bool IsOneByte(const int8_t* cursor, const int8_t* end) {
544 int8_t acc = 0;
545 while (cursor != end) {
546 acc |= *cursor++ >> 7;
547 }
548 return acc == 0;
494 } 549 }
495 550
496 551
497 template<> 552 template<>
498 bool Lexer<uint16_t>::IsSubstringOfSource( 553 template<>
499 const TokenDesc& token) { 554 inline void Lexer<uint16_t>::SetLiteral<true>(const uint16_t* cursor,
500 if (token.has_escapes) return false; 555 const uint16_t* end,
501 const uint16_t* start = buffer_ + token.beg_pos; 556 LiteralDesc* literal) {
502 const uint16_t* end = buffer_ + token.end_pos; 557 Vector<uint8_t> vector = Vector<uint8_t>::New(literal->length);
503 for (const uint16_t* cursor = start; cursor != end; ++cursor) { 558 uint8_t* data = vector.start();
504 if (*cursor >= unibrow::Latin1::kMaxChar) return true; 559 while (cursor < end) {
560 *data++ = *cursor++;
505 } 561 }
506 return false; 562 literal->SetOneByteString(Vector<const uint8_t>::cast(vector), true);
507 } 563 }
508 564
509 565
510 template<> 566 template<>
511 bool Lexer<int8_t>::IsSubstringOfSource(const TokenDesc& token) { 567 template<>
512 // FIXME: implement. 568 inline void Lexer<uint16_t>::SetLiteral<false>(const uint16_t* start,
513 UNREACHABLE(); 569 const uint16_t* end,
514 return false; 570 LiteralDesc* literal) {
571 literal->SetTwoByteString(Vector<const uint16_t>(start, literal->length));
515 } 572 }
516 573
517 574
518 template<> 575 template<>
519 bool Lexer<uint8_t>::FillLiteral( 576 template<>
520 const TokenDesc& token, LiteralDesc* literal) { 577 inline void Lexer<uint8_t>::SetLiteral<true>(const uint8_t* start,
578 const uint8_t* end,
579 LiteralDesc* literal) {
580 literal->SetOneByteString(
581 Vector<const uint8_t>(start, literal->length), false);
582 }
583
584
585 template<>
586 template<>
587 inline void Lexer<int8_t>::SetLiteral<true>(const int8_t* start,
588 const int8_t* end,
589 LiteralDesc* literal) {
590 const uint8_t* cast = reinterpret_cast<const uint8_t*>(start);
591 literal->SetOneByteString(
592 Vector<const uint8_t>(cast, literal->length), false);
593 }
594
595
596 template<class Char>
597 bool Lexer<Char>::FillLiteral(const TokenDesc& token, LiteralDesc* literal) {
521 literal->beg_pos = token.beg_pos; 598 literal->beg_pos = token.beg_pos;
522 const uint8_t* start = buffer_ + token.beg_pos; 599 const Char* start = buffer_ + token.beg_pos;
523 const uint8_t* end = buffer_ + token.end_pos; 600 const Char* end = buffer_ + token.end_pos;
524 if (token.token == Token::STRING) { 601 if (token.token == Token::STRING) {
525 ++start; 602 ++start;
526 --end; 603 --end;
527 } 604 }
528 if (IsSubstringOfSource(token)) { 605 if (!token.has_escapes) {
529 literal->is_one_byte = true; 606 bool is_one_byte = IsOneByte(start, end);
530 literal->is_in_buffer = false; 607 if (sizeof(Char) == 2 || is_one_byte) {
531 literal->offset = start - buffer_; 608 literal->offset = start - buffer_;
532 literal->length = end - start; 609 literal->length = end - start;
533 literal->one_byte_string = Vector<const uint8_t>(start, literal->length); 610 if (sizeof(Char) == 1) {
534 return true; 611 SetLiteral<true>(start, end, literal);
612 } else if (is_one_byte) {
613 SetLiteral<true>(start, end, literal);
614 } else {
615 SetLiteral<false>(start, end, literal);
616 }
617 return true;
618 }
535 } 619 }
536 return CopyToLiteralBuffer(start, end, token, literal); 620 return CopyToLiteralBuffer(start, end, token, literal);
537 } 621 }
538 622
539 623
540 template<>
541 bool Lexer<uint16_t>::FillLiteral(
542 const TokenDesc& token, LiteralDesc* literal) {
543 literal->beg_pos = token.beg_pos;
544 const uint16_t* start = buffer_ + token.beg_pos;
545 const uint16_t* end = buffer_ + token.end_pos;
546 if (token.token == Token::STRING) {
547 ++start;
548 --end;
549 }
550 if (IsSubstringOfSource(token)) {
551 literal->is_one_byte = false;
552 literal->is_in_buffer = false;
553 literal->offset = start - buffer_;
554 literal->length = end - start;
555 literal->two_byte_string = Vector<const uint16_t>(start, literal->length);
556 return true;
557 }
558 return CopyToLiteralBuffer(start, end, token, literal);
559 }
560
561
562 template<>
563 bool Lexer<int8_t>::FillLiteral(
564 const TokenDesc& token, LiteralDesc* literal) {
565 // FIXME: implement.
566 UNREACHABLE();
567 return false;
568 }
569
570
571 template<class Char> 624 template<class Char>
572 bool Lexer<Char>::CopyToLiteralBuffer(const Char* start, 625 bool Lexer<Char>::CopyToLiteralBuffer(const Char* start,
573 const Char* end, 626 const Char* end,
574 const TokenDesc& token, 627 const TokenDesc& token,
575 LiteralDesc* literal) { 628 LiteralDesc* literal) {
576 literal->buffer.Reset(); 629 literal->buffer.Reset();
577 if (token.has_escapes) { 630 if (token.has_escapes) {
578 for (const Char* cursor = start; cursor != end;) { 631 for (const Char* cursor = start; cursor != end;) {
579 if (*cursor != '\\') { 632 if (*cursor != '\\') {
580 literal->buffer.AddChar(*cursor++); 633 literal->buffer.AddChar(*cursor++);
581 } else if (token.token == Token::IDENTIFIER) { 634 } else if (token.token == Token::IDENTIFIER) {
582 uc32 c; 635 uc32 c;
583 cursor = ScanIdentifierUnicodeEscape(cursor, end, &c); 636 cursor = ScanIdentifierUnicodeEscape(cursor, end, &c);
584 ASSERT(cursor != NULL); 637 ASSERT(cursor != NULL);
585 if (cursor == NULL) return false; 638 if (cursor == NULL) return false;
586 literal->buffer.AddChar(c); 639 literal->buffer.AddChar(c);
587 } else { 640 } else {
588 cursor = ScanEscape(cursor, end, &literal->buffer); 641 cursor = ScanEscape(unicode_cache_, cursor, end, &literal->buffer);
589 ASSERT(cursor != NULL); 642 ASSERT(cursor != NULL);
590 if (cursor == NULL) return false; 643 if (cursor == NULL) return false;
591 } 644 }
592 } 645 }
593 } else { 646 } else {
647 // TODO(dcarney): This can only happen for utf8 strings
648 // use a helper function.
594 for (const Char* cursor = start; cursor != end;) { 649 for (const Char* cursor = start; cursor != end;) {
595 literal->buffer.AddChar(*cursor++); 650 literal->buffer.AddChar(*cursor++);
596 } 651 }
597 } 652 }
598 literal->is_one_byte = literal->buffer.is_ascii(); 653 literal->SetStringFromLiteralBuffer();
599 literal->is_in_buffer = true;
600 literal->length = literal->buffer.length();
601 if (literal->is_one_byte) {
602 literal->one_byte_string =
603 Vector<const uint8_t>::cast(literal->buffer.ascii_literal());
604 } else {
605 literal->two_byte_string = literal->buffer.utf16_literal();
606 }
607 return true; 654 return true;
608 } 655 }
609 656
610 657
611 template<class Char> 658 template<class Char>
612 Handle<String> Lexer<Char>::InternalizeLiteral( 659 Handle<String> Lexer<Char>::InternalizeLiteral(
613 LiteralDesc* literal) { 660 LiteralDesc* literal) {
614 Factory* factory = isolate_->factory(); 661 // Factory* factory = isolate_->factory();
615 if (literal->is_in_buffer) { 662 // if (literal->is_in_buffer) {
616 return literal->is_one_byte 663 // return literal->is_one_byte
617 ? factory->InternalizeOneByteString( 664 // ? factory->InternalizeOneByteString(
618 Vector<const uint8_t>::cast(literal->one_byte_string)) 665 // Vector<const uint8_t>::cast(literal->one_byte_string))
619 : factory->InternalizeTwoByteString(literal->two_byte_string); 666 // : factory->InternalizeTwoByteString(literal->two_byte_string);
620 } 667 // }
621 if (sizeof(Char) == 1) { 668 // if (sizeof(Char) == 1) {
622 SubStringKey<uint8_t> key( 669 // SubStringKey<uint8_t> key(
623 source_handle_, literal->offset, literal->length); 670 // source_handle_, literal->offset, literal->length);
624 return factory->InternalizeStringWithKey(&key); 671 // return factory->InternalizeStringWithKey(&key);
625 } else { 672 // } else {
626 SubStringKey<uint16_t> key( 673 // SubStringKey<uint16_t> key(
627 source_handle_, literal->offset, literal->length); 674 // source_handle_, literal->offset, literal->length);
628 return factory->InternalizeStringWithKey(&key); 675 // return factory->InternalizeStringWithKey(&key);
629 } 676 // }
677 CHECK(false);
678 return Handle<String>();
630 } 679 }
631 680
632 681
633 template<> 682 template<>
634 Handle<String> Lexer<uint8_t>::AllocateLiteral( 683 Handle<String> Lexer<uint8_t>::AllocateLiteral(
635 LiteralDesc* literal, PretenureFlag pretenured) { 684 LiteralDesc* literal, PretenureFlag pretenured) {
636 Factory* factory = isolate_->factory(); 685 // Factory* factory = isolate_->factory();
637 if (literal->is_in_buffer) { 686 // if (literal->is_in_buffer) {
638 return literal->is_one_byte 687 // return literal->is_one_byte
639 ? factory->NewStringFromOneByte(literal->one_byte_string, pretenured) 688 // ? factory->NewStringFromOneByte(literal->one_byte_string, pretenured)
640 : factory->NewStringFromTwoByte(literal->two_byte_string, pretenured); 689 // : factory->NewStringFromTwoByte(literal->two_byte_string, pretenured)
641 } 690 // }
642 int from = literal->offset; 691 // int from = literal->offset;
643 int length = literal->length; 692 // int length = literal->length;
644 // Save the offset and the length before allocating the string as it may 693 // // Save the offset and the length before allocating the string as it may
645 // cause a GC, invalidate the literal, and move the source. 694 // // cause a GC, invalidate the literal, and move the source.
646 Handle<String> result = factory->NewRawOneByteString(length, pretenured); 695 // Handle<String> result = factory->NewRawOneByteString(length, pretenured);
647 uint8_t* chars = SeqOneByteString::cast(*result)->GetChars(); 696 // uint8_t* chars = SeqOneByteString::cast(*result)->GetChars();
648 String::WriteToFlat(*source_handle_, chars, from, from + length); 697 // String::WriteToFlat(*source_handle_, chars, from, from + length);
649 return result; 698 // return result;
699 CHECK(false);
700 return Handle<String>();
650 } 701 }
651 702
652 703
653 template<> 704 template<>
654 Handle<String> Lexer<uint16_t>::AllocateLiteral( 705 Handle<String> Lexer<uint16_t>::AllocateLiteral(
655 LiteralDesc* literal, PretenureFlag pretenured) { 706 LiteralDesc* literal, PretenureFlag pretenured) {
656 Factory* factory = isolate_->factory(); 707 // Factory* factory = isolate_->factory();
657 if (literal->is_in_buffer) { 708 // if (literal->is_in_buffer) {
658 return literal->is_one_byte 709 // return literal->is_one_byte
659 ? factory->NewStringFromOneByte(literal->one_byte_string, pretenured) 710 // ? factory->NewStringFromOneByte(literal->one_byte_string, pretenured)
660 : factory->NewStringFromTwoByte(literal->two_byte_string, pretenured); 711 // : factory->NewStringFromTwoByte(literal->two_byte_string, pretenured)
661 } 712 // }
662 // Save the offset and the length before allocating the string as it may 713 // // Save the offset and the length before allocating the string as it may
663 // cause a GC, invalidate the literal, and move the source. 714 // // cause a GC, invalidate the literal, and move the source.
664 int from = literal->offset; 715 // int from = literal->offset;
665 int length = literal->length; 716 // int length = literal->length;
666 Handle<String> result = factory->NewRawTwoByteString(length, pretenured); 717 // Handle<String> result = factory->NewRawTwoByteString(length, pretenured);
667 uint16_t* chars = SeqTwoByteString::cast(*result)->GetChars(); 718 // uint16_t* chars = SeqTwoByteString::cast(*result)->GetChars();
668 String::WriteToFlat(*source_handle_, chars, from, from + length); 719 // String::WriteToFlat(*source_handle_, chars, from, from + length);
669 return result; 720 // return result;
721 CHECK(false);
722 return Handle<String>();
670 } 723 }
671 724
672 725
673 template<> 726 template<>
674 Handle<String> Lexer<int8_t>::AllocateLiteral( 727 Handle<String> Lexer<int8_t>::AllocateLiteral(
675 LiteralDesc* literal, PretenureFlag pretenured) { 728 LiteralDesc* literal, PretenureFlag pretenured) {
676 // FIXME: implement 729 CHECK(false);
677 UNREACHABLE();
678 return Handle<String>(); 730 return Handle<String>();
679 } 731 }
680 732
733
681 template class Lexer<uint8_t>; 734 template class Lexer<uint8_t>;
682 template class Lexer<uint16_t>; 735 template class Lexer<uint16_t>;
683 template class Lexer<int8_t>; 736 template class Lexer<int8_t>;
684 737
685 } } // v8::internal 738 } } // v8::internal
OLDNEW
« no previous file with comments | « src/lexer/lexer.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698