src/lexer/lexer.cc - Issue 187603004: Experimental parser: make utf8 sort of work

Side by Side Diff: src/lexer/lexer.cc

Issue 187603004: Experimental parser: make utf8 sort of work (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: Created 6 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2013 the V8 project authors. All rights reserved.	1 // Copyright 2013 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 121 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
132 void LexerGCHandler::UpdateLexersAfterGC() {	132 void LexerGCHandler::UpdateLexersAfterGC() {

133 typedef std::set<LexerBase*>::const_iterator It;	133 typedef std::set<LexerBase*>::const_iterator It;

134 for (It it = lexers_.begin(); it != lexers_.end(); ++it) {	134 for (It it = lexers_.begin(); it != lexers_.end(); ++it) {

135 (*it)->UpdateBufferBasedOnHandle();	135 (*it)->UpdateBufferBasedOnHandle();

136 }	136 }

137 }	137 }

138	138

139	139

140 LexerBase::LexerBase(UnicodeCache* unicode_cache)	140 LexerBase::LexerBase(UnicodeCache* unicode_cache)

141 : unicode_cache_(unicode_cache),	141 : unicode_cache_(unicode_cache),

	142 current_literal_(&literals_[0]),

	143 next_literal_(&literals_[1]),

142 has_line_terminator_before_next_(true),	144 has_line_terminator_before_next_(true),

143 has_multiline_comment_before_next_(false),	145 has_multiline_comment_before_next_(false),

144 current_literal_(&literals_[0]),

145 next_literal_(&literals_[1]),

146 harmony_numeric_literals_(false),	146 harmony_numeric_literals_(false),

147 harmony_modules_(false),	147 harmony_modules_(false),

148 harmony_scoping_(false) {	148 harmony_scoping_(false) {

149 }	149 }

150	150

151	151

152 LexerBase::~LexerBase() {}	152 LexerBase::~LexerBase() {}

153	153

154	154

155 // Returns the next token and advances input.	155 // Returns the next token and advances input.

156 Token::Value LexerBase::Next() {	156 Token::Value LexerBase::Next() {

157 has_line_terminator_before_next_ = false;	157 has_line_terminator_before_next_ = false;

158 has_multiline_comment_before_next_ = false;	158 has_multiline_comment_before_next_ = false;

159 current_ = next_;	159 current_ = next_;

160 std::swap(current_literal_, next_literal_);	160 std::swap(current_literal_, next_literal_);

161 Scan();	161 Scan();

162 return current_.token;	162 return current_.token;

163 }	163 }

164	164

165	165

166 template<typename Char>	166 template<typename Char>

167 Lexer<Char>::Lexer(UnicodeCache* unicode_cache,	167 Lexer<Char>::Lexer(UnicodeCache* unicode_cache,

168 const Char* source_ptr,	168 const Char* source_ptr,

169 int length)	169 int length)

170 : LexerBase(unicode_cache),	170 : LexerBase(unicode_cache),

171 isolate_(NULL),	171 isolate_(NULL),

172 source_ptr_(source_ptr),	172 source_ptr_(source_ptr),

173 start_position_(0),

174 end_position_(length),	173 end_position_(length),

175 buffer_(NULL),	174 buffer_(source_ptr),

176 buffer_end_(NULL),	175 buffer_end_(source_ptr + length),

177 start_(NULL),	176 start_(source_ptr),

178 cursor_(NULL),	177 cursor_(source_ptr),

179 last_octal_end_(NULL) {	178 last_octal_end_(NULL) {

180 CHECK(false); // not yet supported	179 current_.beg_pos = current_.end_pos = next_.beg_pos = next_.end_pos = 0;

181 }	180 }

182	181

183	182

184 template<typename Char>	183 template<typename Char>

185 Lexer<Char>::Lexer(UnicodeCache* unicode_cache,	184 Lexer<Char>::Lexer(UnicodeCache* unicode_cache,

186 Handle<String> source,	185 Handle<String> source,

187 int start_position,	186 int start_position,

188 int end_position)	187 int end_position)

189 : LexerBase(unicode_cache),	188 : LexerBase(unicode_cache),

190 isolate_(source->GetIsolate()),	189 isolate_(source->GetIsolate()),

191 source_handle_(FlattenGetString(source)),	190 source_handle_(FlattenGetString(source)),

192 source_ptr_(NULL),	191 source_ptr_(NULL),

193 start_position_(start_position),

194 end_position_(end_position),	192 end_position_(end_position),

195 buffer_(NULL),	193 buffer_(NULL),

196 buffer_end_(NULL),	194 buffer_end_(NULL),

197 start_(NULL),	195 start_(NULL),

198 cursor_(NULL),	196 cursor_(NULL),

199 last_octal_end_(NULL) {	197 last_octal_end_(NULL) {

	198 cursor_ += start_position;

200 UpdateBufferBasedOnHandle();	199 UpdateBufferBasedOnHandle();

	200 isolate_->lexer_gc_handler()->AddLexer(this);

201 current_.beg_pos = current_.end_pos = next_.beg_pos = next_.end_pos = 0;	201 current_.beg_pos = current_.end_pos = next_.beg_pos = next_.end_pos = 0;

202 isolate_->lexer_gc_handler()->AddLexer(this);

203 // TODO(dcarney): move this to UpdateBufferBasedOnHandle

204 cursor_ = buffer_ + start_position;

205 buffer_end_ = buffer_ + end_position;

206 start_ = cursor_;

207 }	202 }

208	203

209	204

210 template<typename Char>	205 template<typename Char>

211 Lexer<Char>::~Lexer() {	206 Lexer<Char>::~Lexer() {

212 if (!source_handle_.is_null()) {	207 if (!source_handle_.is_null()) {

213 isolate_->lexer_gc_handler()->RemoveLexer(this);	208 isolate_->lexer_gc_handler()->RemoveLexer(this);

214 }	209 }

215 }	210 }

216	211

217	212

	213 // TODO(dcarney): utf8 handling

218 template<typename Char>	214 template<typename Char>

219 void Lexer<Char>::SeekForward(int pos) {	215 void Lexer<Char>::SeekForward(int pos) {

	216 // TODO(dcarney): utf8 handling

220 cursor_ = buffer_ + pos;	217 cursor_ = buffer_ + pos;

221 start_ = cursor_;	218 start_ = cursor_;

222 has_line_terminator_before_next_ = false;	219 has_line_terminator_before_next_ = false;

223 has_multiline_comment_before_next_ = false;	220 has_multiline_comment_before_next_ = false;

224 Scan(); // Fills in next_.	221 Scan();

225 }	222 }

226	223

227	224

	225 // TODO(dcarney): utf8 handling

228 template<typename Char>	226 template<typename Char>

229 bool Lexer<Char>::ScanRegExpPattern(bool seen_equal) {	227 bool Lexer<Char>::ScanRegExpPattern(bool seen_equal) {

230 // Scan: ('/' \| '/=') RegularExpressionBody '/' RegularExpressionFlags	228 // Scan: ('/' \| '/=') RegularExpressionBody '/' RegularExpressionFlags

231 bool in_character_class = false;	229 bool in_character_class = false;

232	230

233 // Previous token is either '/' or '/=', in the second case, the	231 // Previous token is either '/' or '/=', in the second case, the

234 // pattern starts at =.	232 // pattern starts at =.

235 next_.beg_pos = next_.end_pos = (cursor_ - buffer_) - (seen_equal ? 1 : 0);	233 next_.beg_pos = next_.end_pos = (cursor_ - buffer_) - (seen_equal ? 1 : 0);

236	234

237 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,	235 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,

(...skipping 24 matching lines...) Expand all Loading...
262 if (*cursor_ == ']') in_character_class = false;	260 if (*cursor_ == ']') in_character_class = false;

263 if (++cursor_ >= buffer_end_) return false;	261 if (++cursor_ >= buffer_end_) return false;

264 }	262 }

265 }	263 }

266 next_.end_pos = (cursor_ - buffer_);	264 next_.end_pos = (cursor_ - buffer_);

267 ++cursor_; // consume '/'	265 ++cursor_; // consume '/'

268 return true;	266 return true;

269 }	267 }

270	268

271	269

	270 // TODO(dcarney): utf8 handling

272 template<typename Char>	271 template<typename Char>

273 bool Lexer<Char>::ScanRegExpFlags() {	272 bool Lexer<Char>::ScanRegExpFlags() {

274 next_.beg_pos = cursor_ - buffer_;	273 next_.beg_pos = cursor_ - buffer_;

275 // Scan regular expression flags.	274 // Scan regular expression flags.

276 while (cursor_ < buffer_end_ && unicode_cache_->IsIdentifierPart(*cursor_)) {	275 while (cursor_ < buffer_end_ && unicode_cache_->IsIdentifierPart(*cursor_)) {

277 if (*cursor_ != '\\') {	276 if (*cursor_ != '\\') {

278 if (++cursor_ >= buffer_end_) break;	277 if (++cursor_ >= buffer_end_) break;

279 } else {	278 } else {

280 if (!ScanLiteralUnicodeEscape()) break;	279 if (!ScanLiteralUnicodeEscape()) break;

281 if (++cursor_ >= buffer_end_) break;	280 if (++cursor_ >= buffer_end_) break;

(...skipping 13 matching lines...) Expand all Loading...
295 if (d < 0) {	294 if (d < 0) {

296 return -1;	295 return -1;

297 }	296 }

298 x = x * 16 + d;	297 x = x * 16 + d;

299 }	298 }

300 return x;	299 return x;

301 }	300 }

302	301

303	302

304 template<typename Char>	303 template<typename Char>

305 const Char* Lexer<Char>::ScanHexNumber(	304 static const Char* ScanHexNumber(

306 const Char* cursor, const Char* end, uc32* result) {	305 const Char* cursor, const Char* end, uc32* result) {

307 uc32 x = 0;	306 uc32 x = 0;

308 for ( ; cursor < end; ++cursor) {	307 for ( ; cursor < end; ++cursor) {

309 int d = HexValue(*cursor);	308 int d = HexValue(*cursor);

310 if (d < 0) {	309 if (d < 0) {

311 *result = -1;	310 *result = -1;

312 return NULL;	311 return NULL;

313 }	312 }

314 x = x * 16 + d;	313 x = x * 16 + d;

315 }	314 }

316 *result = x;	315 *result = x;

317 return cursor;	316 return cursor;

318 }	317 }

319	318

320	319

321 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of	320 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of

322 // ECMA-262. Other JS VMs support them.	321 // ECMA-262. Other JS VMs support them.

323 template<typename Char>	322 template<typename Char>

324 const Char* Lexer<Char>::ScanOctalEscape(	323 static const Char* ScanOctalEscape(

325 const Char* start, const Char* end, uc32* result) {	324 const Char* start, const Char* end, uc32* result) {

326 uc32 x = *result - '0';	325 uc32 x = *result - '0';

327 const Char* cursor;	326 const Char* cursor;

328 for (cursor = start; cursor < end; cursor++) {	327 for (cursor = start; cursor < end; cursor++) {

329 int d = *cursor - '0';	328 int d = *cursor - '0';

330 if (d < 0 \|\| d > 7) break;	329 if (d < 0 \|\| d > 7) break;

331 int nx = x * 8 + d;	330 int nx = x * 8 + d;

332 if (nx >= 256) break;	331 if (nx >= 256) break;

333 x = nx;	332 x = nx;

334 }	333 }

335 *result = x;	334 *result = x;

336 return cursor;	335 return cursor;

337 }	336 }

338	337

339	338

	339 // TODO(dcarney): utf8 handling

340 template<typename Char>	340 template<typename Char>

341 bool Lexer<Char>::ScanLiteralUnicodeEscape() {	341 bool Lexer<Char>::ScanLiteralUnicodeEscape() {

342 ASSERT(cursor_ < buffer_end_);	342 ASSERT(cursor_ < buffer_end_);

343 Char primary_char = *(cursor_);	343 Char primary_char = *(cursor_);

344 ASSERT(primary_char == '\\');	344 ASSERT(primary_char == '\\');

345 if (++cursor_ >= buffer_end_) return false;	345 if (++cursor_ >= buffer_end_) return false;

346 primary_char = *(cursor_);	346 primary_char = *(cursor_);

347 int i = 1;	347 int i = 1;

348 if (primary_char == 'u') {	348 if (primary_char == 'u') {

349 i++;	349 i++;

350 while (i < 6) {	350 while (i < 6) {

351 if (++cursor_ >= buffer_end_) return false;	351 if (++cursor_ >= buffer_end_) return false;

352 primary_char = *(cursor_);	352 primary_char = *(cursor_);

353 if (!IsHexDigit(primary_char)) break;	353 if (!IsHexDigit(primary_char)) break;

354 i++;	354 i++;

355 }	355 }

356 }	356 }

357 return i == 6;	357 return i == 6;

358 }	358 }

359	359

360	360

361 template<typename Char>	361 template<typename Char>

362 const Char* Lexer<Char>::ScanIdentifierUnicodeEscape(	362 static const Char* ScanIdentifierUnicodeEscape(

363 const Char* cursor, const Char* end, uc32* result) {	363 const Char* cursor, const Char* end, uc32* result) {

364 ASSERT(*cursor == '\\');	364 ASSERT(*cursor == '\\');

365 if (++cursor >= end) return NULL;	365 if (++cursor >= end) return NULL;

366 if (*cursor != 'u') return NULL;	366 if (*cursor != 'u') return NULL;

367 ++cursor;	367 ++cursor;

368 if (cursor + 4 > end) return NULL;	368 if (cursor + 4 > end) return NULL;

369 cursor = ScanHexNumber(cursor, cursor + 4, result);	369 cursor = ScanHexNumber(cursor, cursor + 4, result);

370 return cursor;	370 return cursor;

371 }	371 }

372	372

373	373

374 template<typename Char>	374 template<typename Char>

375 const Char* Lexer<Char>::ScanEscape(	375 static const Char* ScanEscape(UnicodeCache* cache,

376 const Char* cursor, const Char* end, LiteralBuffer* literal) {	376 const Char* cursor,

	377 const Char* end,

	378 LiteralBuffer* literal) {

377 ASSERT(*cursor == '\\');	379 ASSERT(*cursor == '\\');

378 if (++cursor >= end) return NULL;	380 if (++cursor >= end) return NULL;

379 uc32 c = *cursor;	381 uc32 c = *cursor;

380 if (++cursor > end) return NULL;	382 if (++cursor > end) return NULL;

381 // Skip escaped newlines.	383 // Skip escaped newlines.

382 if (unicode_cache_->IsLineTerminator(c)) {	384 if (cache->IsLineTerminator(c)) {

383 uc32 peek = *cursor;	385 uc32 peek = *cursor;

384 // Allow CR+LF newlines in multiline string literals.	386 // Allow CR+LF newlines in multiline string literals.

385 if (IsCarriageReturn(c) && IsLineFeed(peek)) cursor++;	387 if (IsCarriageReturn(c) && IsLineFeed(peek)) cursor++;

386 // Allow LF+CR newlines in multiline string literals.	388 // Allow LF+CR newlines in multiline string literals.

387 if (IsLineFeed(c) && IsCarriageReturn(peek)) cursor++;	389 if (IsLineFeed(c) && IsCarriageReturn(peek)) cursor++;

388 return cursor;	390 return cursor;

389 }	391 }

390	392

391 switch (c) {	393 switch (c) {

392 case '\'': // fall through	394 case '\'': // fall through

(...skipping 32 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
425 // According to ECMA-262, section 7.8.4, characters not covered by the	427 // According to ECMA-262, section 7.8.4, characters not covered by the

426 // above cases should be illegal, but they are commonly handled as	428 // above cases should be illegal, but they are commonly handled as

427 // non-escaped characters by JS VMs.	429 // non-escaped characters by JS VMs.

428 literal->AddChar(c);	430 literal->AddChar(c);

429 return cursor;	431 return cursor;

430 }	432 }

431	433

432	434

433 template<typename Char>	435 template<typename Char>

434 LexerBase::Location Lexer<Char>::octal_position() const {	436 LexerBase::Location Lexer<Char>::octal_position() const {

435 if (!last_octal_end_)	437 if (!last_octal_end_) return Location::invalid();

436 return Location::invalid();

437 // The last octal might be an octal escape or an octal number. Whichever it	438 // The last octal might be an octal escape or an octal number. Whichever it

438 // is, we'll find the start by just scanning back until we hit a non-octal	439 // is, we'll find the start by just scanning back until we hit a non-octal

439 // character.	440 // character.

440 const Char* temp_cursor = last_octal_end_ - 1;	441 const Char* temp_cursor = last_octal_end_ - 1;

441 while (temp_cursor >= buffer_ && temp_cursor >= '0' && temp_cursor <= '7')	442 while (temp_cursor >= buffer_ && temp_cursor >= '0' && temp_cursor <= '7') {

442 --temp_cursor;	443 --temp_cursor;

	444 }

443 return Location(temp_cursor - buffer_ + 1, last_octal_end_ - buffer_);	445 return Location(temp_cursor - buffer_ + 1, last_octal_end_ - buffer_);

444 }	446 }

445	447

446	448

447 template<>	449 template<>

448 const uint8_t* Lexer<uint8_t>::GetNewBufferBasedOnHandle() const {	450 const uint8_t* Lexer<uint8_t>::GetNewBufferBasedOnHandle() const {

449 String::FlatContent content = source_handle_->GetFlatContent();	451 String::FlatContent content = source_handle_->GetFlatContent();

450 return content.ToOneByteVector().start();	452 return content.ToOneByteVector().start();

451 }	453 }

452	454

(...skipping 17 matching lines...) Expand all Loading...
470 void Lexer<Char>::UpdateBufferBasedOnHandle() {	472 void Lexer<Char>::UpdateBufferBasedOnHandle() {

471 // We get a raw pointer from the Handle, but we also update it every time	473 // We get a raw pointer from the Handle, but we also update it every time

472 // there is a GC, so it is safe.	474 // there is a GC, so it is safe.

473 DisallowHeapAllocation no_gc;	475 DisallowHeapAllocation no_gc;

474 const Char* new_buffer = GetNewBufferBasedOnHandle();	476 const Char* new_buffer = GetNewBufferBasedOnHandle();

475 if (new_buffer != buffer_) {	477 if (new_buffer != buffer_) {

476 int start_offset = start_ - buffer_;	478 int start_offset = start_ - buffer_;

477 int cursor_offset = cursor_ - buffer_;	479 int cursor_offset = cursor_ - buffer_;

478 int last_octal_end_offset = last_octal_end_ - buffer_;	480 int last_octal_end_offset = last_octal_end_ - buffer_;

479 buffer_ = new_buffer;	481 buffer_ = new_buffer;

480 buffer_end_ = buffer_ + source_handle_->length();	482 buffer_end_ = buffer_ + end_position_;

481 start_ = buffer_ + start_offset;	483 start_ = buffer_ + start_offset;

482 cursor_ = buffer_ + cursor_offset;	484 cursor_ = buffer_ + cursor_offset;

483 if (last_octal_end_ != NULL) {	485 if (last_octal_end_ != NULL) {

484 last_octal_end_ = buffer_ + last_octal_end_offset;	486 last_octal_end_ = buffer_ + last_octal_end_offset;

485 }	487 }

486 ResetLiterals();	488 current_literal_->Invalidate();

	489 next_literal_->Invalidate();

487 }	490 }

488 }	491 }

489	492

490	493

491 template<>	494 void LexerBase::LiteralDesc::SetOneByteString(

492 bool Lexer<uint8_t>::IsSubstringOfSource(const TokenDesc& token) {	495 Vector<const uint8_t> string, bool owned) {

493 return !token.has_escapes;	496 is_in_buffer_ = false;

	497 if (is_one_byte_string_owned_) {

	498 one_byte_string_.Dispose();

	499 }

	500 is_one_byte_string_owned_ = owned;

	501 is_one_byte_ = true;

	502 one_byte_string_ = string;

	503 }

	504

	505

	506 void LexerBase::LiteralDesc::SetTwoByteString(Vector<const uint16_t> string) {

	507 is_in_buffer_ = false;

	508 is_one_byte_ = false;

	509 two_byte_string_ = string;

	510 }

	511

	512

	513 void LexerBase::LiteralDesc::SetStringFromLiteralBuffer() {

	514 is_one_byte_ = buffer.is_ascii();

	515 is_in_buffer_ = true;

	516 length = buffer.length();

	517 if (is_one_byte_) {

	518 if (is_one_byte_string_owned_) {

	519 one_byte_string_.Dispose();

	520 }

	521 is_one_byte_string_owned_ = false;

	522 one_byte_string_ = Vector<const uint8_t>::cast(buffer.ascii_literal());

	523 } else {

	524 two_byte_string_ = buffer.utf16_literal();

	525 }

	526 }

	527

	528

	529 static inline bool IsOneByte(const uint8_t* cursor, const uint8_t* end) {

	530 return true;

	531 }

	532

	533

	534 static inline bool IsOneByte(const uint16_t* cursor, const uint16_t* end) {

	535 uint16_t acc = 0;

	536 while (cursor != end) {

	537 acc \|= *cursor++ >> 8;

	538 }

	539 return acc == 0;

	540 }

	541

	542

	543 static inline bool IsOneByte(const int8_t* cursor, const int8_t* end) {

	544 int8_t acc = 0;

	545 while (cursor != end) {

	546 acc \|= *cursor++ >> 7;

	547 }

	548 return acc == 0;

494 }	549 }

495	550

496	551

497 template<>	552 template<>

498 bool Lexer<uint16_t>::IsSubstringOfSource(	553 template<>

499 const TokenDesc& token) {	554 inline void Lexer<uint16_t>::SetLiteral<true>(const uint16_t* cursor,

500 if (token.has_escapes) return false;	555 const uint16_t* end,

501 const uint16_t* start = buffer_ + token.beg_pos;	556 LiteralDesc* literal) {

502 const uint16_t* end = buffer_ + token.end_pos;	557 Vector<uint8_t> vector = Vector<uint8_t>::New(literal->length);

503 for (const uint16_t* cursor = start; cursor != end; ++cursor) {	558 uint8_t* data = vector.start();

504 if (*cursor >= unibrow::Latin1::kMaxChar) return true;	559 while (cursor < end) {

	560 data++ = cursor++;

505 }	561 }

506 return false;	562 literal->SetOneByteString(Vector<const uint8_t>::cast(vector), true);

507 }	563 }

508	564

509	565

510 template<>	566 template<>

511 bool Lexer<int8_t>::IsSubstringOfSource(const TokenDesc& token) {	567 template<>

512 // FIXME: implement.	568 inline void Lexer<uint16_t>::SetLiteral<false>(const uint16_t* start,

513 UNREACHABLE();	569 const uint16_t* end,

514 return false;	570 LiteralDesc* literal) {

	571 literal->SetTwoByteString(Vector<const uint16_t>(start, literal->length));

515 }	572 }

516	573

517	574

518 template<>	575 template<>

519 bool Lexer<uint8_t>::FillLiteral(	576 template<>

520 const TokenDesc& token, LiteralDesc* literal) {	577 inline void Lexer<uint8_t>::SetLiteral<true>(const uint8_t* start,

	578 const uint8_t* end,

	579 LiteralDesc* literal) {

	580 literal->SetOneByteString(

	581 Vector<const uint8_t>(start, literal->length), false);

	582 }

	583

	584

	585 template<>

	586 template<>

	587 inline void Lexer<int8_t>::SetLiteral<true>(const int8_t* start,

	588 const int8_t* end,

	589 LiteralDesc* literal) {

	590 const uint8_t* cast = reinterpret_cast<const uint8_t*>(start);

	591 literal->SetOneByteString(

	592 Vector<const uint8_t>(cast, literal->length), false);

	593 }

	594

	595

	596 template<class Char>

	597 bool Lexer<Char>::FillLiteral(const TokenDesc& token, LiteralDesc* literal) {

521 literal->beg_pos = token.beg_pos;	598 literal->beg_pos = token.beg_pos;

522 const uint8_t* start = buffer_ + token.beg_pos;	599 const Char* start = buffer_ + token.beg_pos;

523 const uint8_t* end = buffer_ + token.end_pos;	600 const Char* end = buffer_ + token.end_pos;

524 if (token.token == Token::STRING) {	601 if (token.token == Token::STRING) {

525 ++start;	602 ++start;

526 --end;	603 --end;

527 }	604 }

528 if (IsSubstringOfSource(token)) {	605 if (!token.has_escapes) {

529 literal->is_one_byte = true;	606 bool is_one_byte = IsOneByte(start, end);

530 literal->is_in_buffer = false;	607 if (sizeof(Char) == 2 \|\| is_one_byte) {

531 literal->offset = start - buffer_;	608 literal->offset = start - buffer_;

532 literal->length = end - start;	609 literal->length = end - start;

533 literal->one_byte_string = Vector<const uint8_t>(start, literal->length);	610 if (sizeof(Char) == 1) {

534 return true;	611 SetLiteral<true>(start, end, literal);

	612 } else if (is_one_byte) {

	613 SetLiteral<true>(start, end, literal);

	614 } else {

	615 SetLiteral<false>(start, end, literal);

	616 }

	617 return true;

	618 }

535 }	619 }

536 return CopyToLiteralBuffer(start, end, token, literal);	620 return CopyToLiteralBuffer(start, end, token, literal);

537 }	621 }

538	622

539	623

540 template<>

541 bool Lexer<uint16_t>::FillLiteral(

542 const TokenDesc& token, LiteralDesc* literal) {

543 literal->beg_pos = token.beg_pos;

544 const uint16_t* start = buffer_ + token.beg_pos;

545 const uint16_t* end = buffer_ + token.end_pos;

546 if (token.token == Token::STRING) {

547 ++start;

548 --end;

549 }

550 if (IsSubstringOfSource(token)) {

551 literal->is_one_byte = false;

552 literal->is_in_buffer = false;

553 literal->offset = start - buffer_;

554 literal->length = end - start;

555 literal->two_byte_string = Vector<const uint16_t>(start, literal->length);

556 return true;

557 }

558 return CopyToLiteralBuffer(start, end, token, literal);

559 }

560

561

562 template<>

563 bool Lexer<int8_t>::FillLiteral(

564 const TokenDesc& token, LiteralDesc* literal) {

565 // FIXME: implement.

566 UNREACHABLE();

567 return false;

568 }

569

570

571 template<class Char>	624 template<class Char>

572 bool Lexer<Char>::CopyToLiteralBuffer(const Char* start,	625 bool Lexer<Char>::CopyToLiteralBuffer(const Char* start,

573 const Char* end,	626 const Char* end,

574 const TokenDesc& token,	627 const TokenDesc& token,

575 LiteralDesc* literal) {	628 LiteralDesc* literal) {

576 literal->buffer.Reset();	629 literal->buffer.Reset();

577 if (token.has_escapes) {	630 if (token.has_escapes) {

578 for (const Char* cursor = start; cursor != end;) {	631 for (const Char* cursor = start; cursor != end;) {

579 if (*cursor != '\\') {	632 if (*cursor != '\\') {

580 literal->buffer.AddChar(*cursor++);	633 literal->buffer.AddChar(*cursor++);

581 } else if (token.token == Token::IDENTIFIER) {	634 } else if (token.token == Token::IDENTIFIER) {

582 uc32 c;	635 uc32 c;

583 cursor = ScanIdentifierUnicodeEscape(cursor, end, &c);	636 cursor = ScanIdentifierUnicodeEscape(cursor, end, &c);

584 ASSERT(cursor != NULL);	637 ASSERT(cursor != NULL);

585 if (cursor == NULL) return false;	638 if (cursor == NULL) return false;

586 literal->buffer.AddChar(c);	639 literal->buffer.AddChar(c);

587 } else {	640 } else {

588 cursor = ScanEscape(cursor, end, &literal->buffer);	641 cursor = ScanEscape(unicode_cache_, cursor, end, &literal->buffer);

589 ASSERT(cursor != NULL);	642 ASSERT(cursor != NULL);

590 if (cursor == NULL) return false;	643 if (cursor == NULL) return false;

591 }	644 }

592 }	645 }

593 } else {	646 } else {

	647 // TODO(dcarney): This can only happen for utf8 strings

	648 // use a helper function.

594 for (const Char* cursor = start; cursor != end;) {	649 for (const Char* cursor = start; cursor != end;) {

595 literal->buffer.AddChar(*cursor++);	650 literal->buffer.AddChar(*cursor++);

596 }	651 }

597 }	652 }

598 literal->is_one_byte = literal->buffer.is_ascii();	653 literal->SetStringFromLiteralBuffer();

599 literal->is_in_buffer = true;

600 literal->length = literal->buffer.length();

601 if (literal->is_one_byte) {

602 literal->one_byte_string =

603 Vector<const uint8_t>::cast(literal->buffer.ascii_literal());

604 } else {

605 literal->two_byte_string = literal->buffer.utf16_literal();

606 }

607 return true;	654 return true;

608 }	655 }

609	656

610	657

611 template<class Char>	658 template<class Char>

612 Handle<String> Lexer<Char>::InternalizeLiteral(	659 Handle<String> Lexer<Char>::InternalizeLiteral(

613 LiteralDesc* literal) {	660 LiteralDesc* literal) {

614 Factory* factory = isolate_->factory();	661 // Factory* factory = isolate_->factory();

615 if (literal->is_in_buffer) {	662 // if (literal->is_in_buffer) {

616 return literal->is_one_byte	663 // return literal->is_one_byte

617 ? factory->InternalizeOneByteString(	664 // ? factory->InternalizeOneByteString(

618 Vector<const uint8_t>::cast(literal->one_byte_string))	665 // Vector<const uint8_t>::cast(literal->one_byte_string))

619 : factory->InternalizeTwoByteString(literal->two_byte_string);	666 // : factory->InternalizeTwoByteString(literal->two_byte_string);

620 }	667 // }

621 if (sizeof(Char) == 1) {	668 // if (sizeof(Char) == 1) {

622 SubStringKey<uint8_t> key(	669 // SubStringKey<uint8_t> key(

623 source_handle_, literal->offset, literal->length);	670 // source_handle_, literal->offset, literal->length);

624 return factory->InternalizeStringWithKey(&key);	671 // return factory->InternalizeStringWithKey(&key);

625 } else {	672 // } else {

626 SubStringKey<uint16_t> key(	673 // SubStringKey<uint16_t> key(

627 source_handle_, literal->offset, literal->length);	674 // source_handle_, literal->offset, literal->length);

628 return factory->InternalizeStringWithKey(&key);	675 // return factory->InternalizeStringWithKey(&key);

629 }	676 // }

	677 CHECK(false);

	678 return Handle<String>();

630 }	679 }

631	680

632	681

633 template<>	682 template<>

634 Handle<String> Lexer<uint8_t>::AllocateLiteral(	683 Handle<String> Lexer<uint8_t>::AllocateLiteral(

635 LiteralDesc* literal, PretenureFlag pretenured) {	684 LiteralDesc* literal, PretenureFlag pretenured) {

636 Factory* factory = isolate_->factory();	685 // Factory* factory = isolate_->factory();

637 if (literal->is_in_buffer) {	686 // if (literal->is_in_buffer) {

638 return literal->is_one_byte	687 // return literal->is_one_byte

639 ? factory->NewStringFromOneByte(literal->one_byte_string, pretenured)	688 // ? factory->NewStringFromOneByte(literal->one_byte_string, pretenured)

640 : factory->NewStringFromTwoByte(literal->two_byte_string, pretenured);	689 // : factory->NewStringFromTwoByte(literal->two_byte_string, pretenured)

641 }	690 // }

642 int from = literal->offset;	691 // int from = literal->offset;

643 int length = literal->length;	692 // int length = literal->length;

644 // Save the offset and the length before allocating the string as it may	693 // // Save the offset and the length before allocating the string as it may

645 // cause a GC, invalidate the literal, and move the source.	694 // // cause a GC, invalidate the literal, and move the source.

646 Handle<String> result = factory->NewRawOneByteString(length, pretenured);	695 // Handle<String> result = factory->NewRawOneByteString(length, pretenured);

647 uint8_t* chars = SeqOneByteString::cast(*result)->GetChars();	696 // uint8_t* chars = SeqOneByteString::cast(*result)->GetChars();

648 String::WriteToFlat(*source_handle_, chars, from, from + length);	697 // String::WriteToFlat(*source_handle_, chars, from, from + length);

649 return result;	698 // return result;

	699 CHECK(false);

	700 return Handle<String>();

650 }	701 }

651	702

652	703

653 template<>	704 template<>

654 Handle<String> Lexer<uint16_t>::AllocateLiteral(	705 Handle<String> Lexer<uint16_t>::AllocateLiteral(

655 LiteralDesc* literal, PretenureFlag pretenured) {	706 LiteralDesc* literal, PretenureFlag pretenured) {

656 Factory* factory = isolate_->factory();	707 // Factory* factory = isolate_->factory();

657 if (literal->is_in_buffer) {	708 // if (literal->is_in_buffer) {

658 return literal->is_one_byte	709 // return literal->is_one_byte

659 ? factory->NewStringFromOneByte(literal->one_byte_string, pretenured)	710 // ? factory->NewStringFromOneByte(literal->one_byte_string, pretenured)

660 : factory->NewStringFromTwoByte(literal->two_byte_string, pretenured);	711 // : factory->NewStringFromTwoByte(literal->two_byte_string, pretenured)

661 }	712 // }

662 // Save the offset and the length before allocating the string as it may	713 // // Save the offset and the length before allocating the string as it may

663 // cause a GC, invalidate the literal, and move the source.	714 // // cause a GC, invalidate the literal, and move the source.

664 int from = literal->offset;	715 // int from = literal->offset;

665 int length = literal->length;	716 // int length = literal->length;

666 Handle<String> result = factory->NewRawTwoByteString(length, pretenured);	717 // Handle<String> result = factory->NewRawTwoByteString(length, pretenured);

667 uint16_t* chars = SeqTwoByteString::cast(*result)->GetChars();	718 // uint16_t* chars = SeqTwoByteString::cast(*result)->GetChars();

668 String::WriteToFlat(*source_handle_, chars, from, from + length);	719 // String::WriteToFlat(*source_handle_, chars, from, from + length);

669 return result;	720 // return result;

	721 CHECK(false);

	722 return Handle<String>();

670 }	723 }

671	724

672	725

673 template<>	726 template<>

674 Handle<String> Lexer<int8_t>::AllocateLiteral(	727 Handle<String> Lexer<int8_t>::AllocateLiteral(

675 LiteralDesc* literal, PretenureFlag pretenured) {	728 LiteralDesc* literal, PretenureFlag pretenured) {

676 // FIXME: implement	729 CHECK(false);

677 UNREACHABLE();

678 return Handle<String>();	730 return Handle<String>();

679 }	731 }

680	732

	733

681 template class Lexer<uint8_t>;	734 template class Lexer<uint8_t>;

682 template class Lexer<uint16_t>;	735 template class Lexer<uint16_t>;

683 template class Lexer<int8_t>;	736 template class Lexer<int8_t>;

684	737

685 } } // v8::internal	738 } } // v8::internal

OLD	NEW

« no previous file with comments | « src/lexer/lexer.h ('k') | no next file » | no next file with comments »