| OLD | NEW |
| 1 // Copyright 2013 the V8 project authors. All rights reserved. | 1 // Copyright 2013 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 121 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 132 void LexerGCHandler::UpdateLexersAfterGC() { | 132 void LexerGCHandler::UpdateLexersAfterGC() { |
| 133 typedef std::set<LexerBase*>::const_iterator It; | 133 typedef std::set<LexerBase*>::const_iterator It; |
| 134 for (It it = lexers_.begin(); it != lexers_.end(); ++it) { | 134 for (It it = lexers_.begin(); it != lexers_.end(); ++it) { |
| 135 (*it)->UpdateBufferBasedOnHandle(); | 135 (*it)->UpdateBufferBasedOnHandle(); |
| 136 } | 136 } |
| 137 } | 137 } |
| 138 | 138 |
| 139 | 139 |
| 140 LexerBase::LexerBase(UnicodeCache* unicode_cache) | 140 LexerBase::LexerBase(UnicodeCache* unicode_cache) |
| 141 : unicode_cache_(unicode_cache), | 141 : unicode_cache_(unicode_cache), |
| 142 current_literal_(&literals_[0]), |
| 143 next_literal_(&literals_[1]), |
| 142 has_line_terminator_before_next_(true), | 144 has_line_terminator_before_next_(true), |
| 143 has_multiline_comment_before_next_(false), | 145 has_multiline_comment_before_next_(false), |
| 144 current_literal_(&literals_[0]), | |
| 145 next_literal_(&literals_[1]), | |
| 146 harmony_numeric_literals_(false), | 146 harmony_numeric_literals_(false), |
| 147 harmony_modules_(false), | 147 harmony_modules_(false), |
| 148 harmony_scoping_(false) { | 148 harmony_scoping_(false) { |
| 149 } | 149 } |
| 150 | 150 |
| 151 | 151 |
| 152 LexerBase::~LexerBase() {} | 152 LexerBase::~LexerBase() {} |
| 153 | 153 |
| 154 | 154 |
| 155 // Returns the next token and advances input. | 155 // Returns the next token and advances input. |
| 156 Token::Value LexerBase::Next() { | 156 Token::Value LexerBase::Next() { |
| 157 has_line_terminator_before_next_ = false; | 157 has_line_terminator_before_next_ = false; |
| 158 has_multiline_comment_before_next_ = false; | 158 has_multiline_comment_before_next_ = false; |
| 159 current_ = next_; | 159 current_ = next_; |
| 160 std::swap(current_literal_, next_literal_); | 160 std::swap(current_literal_, next_literal_); |
| 161 Scan(); | 161 Scan(); |
| 162 return current_.token; | 162 return current_.token; |
| 163 } | 163 } |
| 164 | 164 |
| 165 | 165 |
| 166 template<typename Char> | 166 template<typename Char> |
| 167 Lexer<Char>::Lexer(UnicodeCache* unicode_cache, | 167 Lexer<Char>::Lexer(UnicodeCache* unicode_cache, |
| 168 const Char* source_ptr, | 168 const Char* source_ptr, |
| 169 int length) | 169 int length) |
| 170 : LexerBase(unicode_cache), | 170 : LexerBase(unicode_cache), |
| 171 isolate_(NULL), | 171 isolate_(NULL), |
| 172 source_ptr_(source_ptr), | 172 source_ptr_(source_ptr), |
| 173 start_position_(0), | |
| 174 end_position_(length), | 173 end_position_(length), |
| 175 buffer_(NULL), | 174 buffer_(source_ptr), |
| 176 buffer_end_(NULL), | 175 buffer_end_(source_ptr + length), |
| 177 start_(NULL), | 176 start_(source_ptr), |
| 178 cursor_(NULL), | 177 cursor_(source_ptr), |
| 179 last_octal_end_(NULL) { | 178 last_octal_end_(NULL) { |
| 180 CHECK(false); // not yet supported | 179 current_.beg_pos = current_.end_pos = next_.beg_pos = next_.end_pos = 0; |
| 181 } | 180 } |
| 182 | 181 |
| 183 | 182 |
| 184 template<typename Char> | 183 template<typename Char> |
| 185 Lexer<Char>::Lexer(UnicodeCache* unicode_cache, | 184 Lexer<Char>::Lexer(UnicodeCache* unicode_cache, |
| 186 Handle<String> source, | 185 Handle<String> source, |
| 187 int start_position, | 186 int start_position, |
| 188 int end_position) | 187 int end_position) |
| 189 : LexerBase(unicode_cache), | 188 : LexerBase(unicode_cache), |
| 190 isolate_(source->GetIsolate()), | 189 isolate_(source->GetIsolate()), |
| 191 source_handle_(FlattenGetString(source)), | 190 source_handle_(FlattenGetString(source)), |
| 192 source_ptr_(NULL), | 191 source_ptr_(NULL), |
| 193 start_position_(start_position), | |
| 194 end_position_(end_position), | 192 end_position_(end_position), |
| 195 buffer_(NULL), | 193 buffer_(NULL), |
| 196 buffer_end_(NULL), | 194 buffer_end_(NULL), |
| 197 start_(NULL), | 195 start_(NULL), |
| 198 cursor_(NULL), | 196 cursor_(NULL), |
| 199 last_octal_end_(NULL) { | 197 last_octal_end_(NULL) { |
| 198 cursor_ += start_position; |
| 200 UpdateBufferBasedOnHandle(); | 199 UpdateBufferBasedOnHandle(); |
| 200 isolate_->lexer_gc_handler()->AddLexer(this); |
| 201 current_.beg_pos = current_.end_pos = next_.beg_pos = next_.end_pos = 0; | 201 current_.beg_pos = current_.end_pos = next_.beg_pos = next_.end_pos = 0; |
| 202 isolate_->lexer_gc_handler()->AddLexer(this); | |
| 203 // TODO(dcarney): move this to UpdateBufferBasedOnHandle | |
| 204 cursor_ = buffer_ + start_position; | |
| 205 buffer_end_ = buffer_ + end_position; | |
| 206 start_ = cursor_; | |
| 207 } | 202 } |
| 208 | 203 |
| 209 | 204 |
| 210 template<typename Char> | 205 template<typename Char> |
| 211 Lexer<Char>::~Lexer() { | 206 Lexer<Char>::~Lexer() { |
| 212 if (!source_handle_.is_null()) { | 207 if (!source_handle_.is_null()) { |
| 213 isolate_->lexer_gc_handler()->RemoveLexer(this); | 208 isolate_->lexer_gc_handler()->RemoveLexer(this); |
| 214 } | 209 } |
| 215 } | 210 } |
| 216 | 211 |
| 217 | 212 |
| 213 // TODO(dcarney): utf8 handling |
| 218 template<typename Char> | 214 template<typename Char> |
| 219 void Lexer<Char>::SeekForward(int pos) { | 215 void Lexer<Char>::SeekForward(int pos) { |
| 216 // TODO(dcarney): utf8 handling |
| 220 cursor_ = buffer_ + pos; | 217 cursor_ = buffer_ + pos; |
| 221 start_ = cursor_; | 218 start_ = cursor_; |
| 222 has_line_terminator_before_next_ = false; | 219 has_line_terminator_before_next_ = false; |
| 223 has_multiline_comment_before_next_ = false; | 220 has_multiline_comment_before_next_ = false; |
| 224 Scan(); // Fills in next_. | 221 Scan(); |
| 225 } | 222 } |
| 226 | 223 |
| 227 | 224 |
| 225 // TODO(dcarney): utf8 handling |
| 228 template<typename Char> | 226 template<typename Char> |
| 229 bool Lexer<Char>::ScanRegExpPattern(bool seen_equal) { | 227 bool Lexer<Char>::ScanRegExpPattern(bool seen_equal) { |
| 230 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags | 228 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags |
| 231 bool in_character_class = false; | 229 bool in_character_class = false; |
| 232 | 230 |
| 233 // Previous token is either '/' or '/=', in the second case, the | 231 // Previous token is either '/' or '/=', in the second case, the |
| 234 // pattern starts at =. | 232 // pattern starts at =. |
| 235 next_.beg_pos = next_.end_pos = (cursor_ - buffer_) - (seen_equal ? 1 : 0); | 233 next_.beg_pos = next_.end_pos = (cursor_ - buffer_) - (seen_equal ? 1 : 0); |
| 236 | 234 |
| 237 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, | 235 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, |
| (...skipping 24 matching lines...) Expand all Loading... |
| 262 if (*cursor_ == ']') in_character_class = false; | 260 if (*cursor_ == ']') in_character_class = false; |
| 263 if (++cursor_ >= buffer_end_) return false; | 261 if (++cursor_ >= buffer_end_) return false; |
| 264 } | 262 } |
| 265 } | 263 } |
| 266 next_.end_pos = (cursor_ - buffer_); | 264 next_.end_pos = (cursor_ - buffer_); |
| 267 ++cursor_; // consume '/' | 265 ++cursor_; // consume '/' |
| 268 return true; | 266 return true; |
| 269 } | 267 } |
| 270 | 268 |
| 271 | 269 |
| 270 // TODO(dcarney): utf8 handling |
| 272 template<typename Char> | 271 template<typename Char> |
| 273 bool Lexer<Char>::ScanRegExpFlags() { | 272 bool Lexer<Char>::ScanRegExpFlags() { |
| 274 next_.beg_pos = cursor_ - buffer_; | 273 next_.beg_pos = cursor_ - buffer_; |
| 275 // Scan regular expression flags. | 274 // Scan regular expression flags. |
| 276 while (cursor_ < buffer_end_ && unicode_cache_->IsIdentifierPart(*cursor_)) { | 275 while (cursor_ < buffer_end_ && unicode_cache_->IsIdentifierPart(*cursor_)) { |
| 277 if (*cursor_ != '\\') { | 276 if (*cursor_ != '\\') { |
| 278 if (++cursor_ >= buffer_end_) break; | 277 if (++cursor_ >= buffer_end_) break; |
| 279 } else { | 278 } else { |
| 280 if (!ScanLiteralUnicodeEscape()) break; | 279 if (!ScanLiteralUnicodeEscape()) break; |
| 281 if (++cursor_ >= buffer_end_) break; | 280 if (++cursor_ >= buffer_end_) break; |
| (...skipping 13 matching lines...) Expand all Loading... |
| 295 if (d < 0) { | 294 if (d < 0) { |
| 296 return -1; | 295 return -1; |
| 297 } | 296 } |
| 298 x = x * 16 + d; | 297 x = x * 16 + d; |
| 299 } | 298 } |
| 300 return x; | 299 return x; |
| 301 } | 300 } |
| 302 | 301 |
| 303 | 302 |
| 304 template<typename Char> | 303 template<typename Char> |
| 305 const Char* Lexer<Char>::ScanHexNumber( | 304 static const Char* ScanHexNumber( |
| 306 const Char* cursor, const Char* end, uc32* result) { | 305 const Char* cursor, const Char* end, uc32* result) { |
| 307 uc32 x = 0; | 306 uc32 x = 0; |
| 308 for ( ; cursor < end; ++cursor) { | 307 for ( ; cursor < end; ++cursor) { |
| 309 int d = HexValue(*cursor); | 308 int d = HexValue(*cursor); |
| 310 if (d < 0) { | 309 if (d < 0) { |
| 311 *result = -1; | 310 *result = -1; |
| 312 return NULL; | 311 return NULL; |
| 313 } | 312 } |
| 314 x = x * 16 + d; | 313 x = x * 16 + d; |
| 315 } | 314 } |
| 316 *result = x; | 315 *result = x; |
| 317 return cursor; | 316 return cursor; |
| 318 } | 317 } |
| 319 | 318 |
| 320 | 319 |
| 321 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of | 320 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of |
| 322 // ECMA-262. Other JS VMs support them. | 321 // ECMA-262. Other JS VMs support them. |
| 323 template<typename Char> | 322 template<typename Char> |
| 324 const Char* Lexer<Char>::ScanOctalEscape( | 323 static const Char* ScanOctalEscape( |
| 325 const Char* start, const Char* end, uc32* result) { | 324 const Char* start, const Char* end, uc32* result) { |
| 326 uc32 x = *result - '0'; | 325 uc32 x = *result - '0'; |
| 327 const Char* cursor; | 326 const Char* cursor; |
| 328 for (cursor = start; cursor < end; cursor++) { | 327 for (cursor = start; cursor < end; cursor++) { |
| 329 int d = *cursor - '0'; | 328 int d = *cursor - '0'; |
| 330 if (d < 0 || d > 7) break; | 329 if (d < 0 || d > 7) break; |
| 331 int nx = x * 8 + d; | 330 int nx = x * 8 + d; |
| 332 if (nx >= 256) break; | 331 if (nx >= 256) break; |
| 333 x = nx; | 332 x = nx; |
| 334 } | 333 } |
| 335 *result = x; | 334 *result = x; |
| 336 return cursor; | 335 return cursor; |
| 337 } | 336 } |
| 338 | 337 |
| 339 | 338 |
| 339 // TODO(dcarney): utf8 handling |
| 340 template<typename Char> | 340 template<typename Char> |
| 341 bool Lexer<Char>::ScanLiteralUnicodeEscape() { | 341 bool Lexer<Char>::ScanLiteralUnicodeEscape() { |
| 342 ASSERT(cursor_ < buffer_end_); | 342 ASSERT(cursor_ < buffer_end_); |
| 343 Char primary_char = *(cursor_); | 343 Char primary_char = *(cursor_); |
| 344 ASSERT(primary_char == '\\'); | 344 ASSERT(primary_char == '\\'); |
| 345 if (++cursor_ >= buffer_end_) return false; | 345 if (++cursor_ >= buffer_end_) return false; |
| 346 primary_char = *(cursor_); | 346 primary_char = *(cursor_); |
| 347 int i = 1; | 347 int i = 1; |
| 348 if (primary_char == 'u') { | 348 if (primary_char == 'u') { |
| 349 i++; | 349 i++; |
| 350 while (i < 6) { | 350 while (i < 6) { |
| 351 if (++cursor_ >= buffer_end_) return false; | 351 if (++cursor_ >= buffer_end_) return false; |
| 352 primary_char = *(cursor_); | 352 primary_char = *(cursor_); |
| 353 if (!IsHexDigit(primary_char)) break; | 353 if (!IsHexDigit(primary_char)) break; |
| 354 i++; | 354 i++; |
| 355 } | 355 } |
| 356 } | 356 } |
| 357 return i == 6; | 357 return i == 6; |
| 358 } | 358 } |
| 359 | 359 |
| 360 | 360 |
| 361 template<typename Char> | 361 template<typename Char> |
| 362 const Char* Lexer<Char>::ScanIdentifierUnicodeEscape( | 362 static const Char* ScanIdentifierUnicodeEscape( |
| 363 const Char* cursor, const Char* end, uc32* result) { | 363 const Char* cursor, const Char* end, uc32* result) { |
| 364 ASSERT(*cursor == '\\'); | 364 ASSERT(*cursor == '\\'); |
| 365 if (++cursor >= end) return NULL; | 365 if (++cursor >= end) return NULL; |
| 366 if (*cursor != 'u') return NULL; | 366 if (*cursor != 'u') return NULL; |
| 367 ++cursor; | 367 ++cursor; |
| 368 if (cursor + 4 > end) return NULL; | 368 if (cursor + 4 > end) return NULL; |
| 369 cursor = ScanHexNumber(cursor, cursor + 4, result); | 369 cursor = ScanHexNumber(cursor, cursor + 4, result); |
| 370 return cursor; | 370 return cursor; |
| 371 } | 371 } |
| 372 | 372 |
| 373 | 373 |
| 374 template<typename Char> | 374 template<typename Char> |
| 375 const Char* Lexer<Char>::ScanEscape( | 375 static const Char* ScanEscape(UnicodeCache* cache, |
| 376 const Char* cursor, const Char* end, LiteralBuffer* literal) { | 376 const Char* cursor, |
| 377 const Char* end, |
| 378 LiteralBuffer* literal) { |
| 377 ASSERT(*cursor == '\\'); | 379 ASSERT(*cursor == '\\'); |
| 378 if (++cursor >= end) return NULL; | 380 if (++cursor >= end) return NULL; |
| 379 uc32 c = *cursor; | 381 uc32 c = *cursor; |
| 380 if (++cursor > end) return NULL; | 382 if (++cursor > end) return NULL; |
| 381 // Skip escaped newlines. | 383 // Skip escaped newlines. |
| 382 if (unicode_cache_->IsLineTerminator(c)) { | 384 if (cache->IsLineTerminator(c)) { |
| 383 uc32 peek = *cursor; | 385 uc32 peek = *cursor; |
| 384 // Allow CR+LF newlines in multiline string literals. | 386 // Allow CR+LF newlines in multiline string literals. |
| 385 if (IsCarriageReturn(c) && IsLineFeed(peek)) cursor++; | 387 if (IsCarriageReturn(c) && IsLineFeed(peek)) cursor++; |
| 386 // Allow LF+CR newlines in multiline string literals. | 388 // Allow LF+CR newlines in multiline string literals. |
| 387 if (IsLineFeed(c) && IsCarriageReturn(peek)) cursor++; | 389 if (IsLineFeed(c) && IsCarriageReturn(peek)) cursor++; |
| 388 return cursor; | 390 return cursor; |
| 389 } | 391 } |
| 390 | 392 |
| 391 switch (c) { | 393 switch (c) { |
| 392 case '\'': // fall through | 394 case '\'': // fall through |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 425 // According to ECMA-262, section 7.8.4, characters not covered by the | 427 // According to ECMA-262, section 7.8.4, characters not covered by the |
| 426 // above cases should be illegal, but they are commonly handled as | 428 // above cases should be illegal, but they are commonly handled as |
| 427 // non-escaped characters by JS VMs. | 429 // non-escaped characters by JS VMs. |
| 428 literal->AddChar(c); | 430 literal->AddChar(c); |
| 429 return cursor; | 431 return cursor; |
| 430 } | 432 } |
| 431 | 433 |
| 432 | 434 |
| 433 template<typename Char> | 435 template<typename Char> |
| 434 LexerBase::Location Lexer<Char>::octal_position() const { | 436 LexerBase::Location Lexer<Char>::octal_position() const { |
| 435 if (!last_octal_end_) | 437 if (!last_octal_end_) return Location::invalid(); |
| 436 return Location::invalid(); | |
| 437 // The last octal might be an octal escape or an octal number. Whichever it | 438 // The last octal might be an octal escape or an octal number. Whichever it |
| 438 // is, we'll find the start by just scanning back until we hit a non-octal | 439 // is, we'll find the start by just scanning back until we hit a non-octal |
| 439 // character. | 440 // character. |
| 440 const Char* temp_cursor = last_octal_end_ - 1; | 441 const Char* temp_cursor = last_octal_end_ - 1; |
| 441 while (temp_cursor >= buffer_ && *temp_cursor >= '0' && *temp_cursor <= '7') | 442 while (temp_cursor >= buffer_ && *temp_cursor >= '0' && *temp_cursor <= '7') { |
| 442 --temp_cursor; | 443 --temp_cursor; |
| 444 } |
| 443 return Location(temp_cursor - buffer_ + 1, last_octal_end_ - buffer_); | 445 return Location(temp_cursor - buffer_ + 1, last_octal_end_ - buffer_); |
| 444 } | 446 } |
| 445 | 447 |
| 446 | 448 |
| 447 template<> | 449 template<> |
| 448 const uint8_t* Lexer<uint8_t>::GetNewBufferBasedOnHandle() const { | 450 const uint8_t* Lexer<uint8_t>::GetNewBufferBasedOnHandle() const { |
| 449 String::FlatContent content = source_handle_->GetFlatContent(); | 451 String::FlatContent content = source_handle_->GetFlatContent(); |
| 450 return content.ToOneByteVector().start(); | 452 return content.ToOneByteVector().start(); |
| 451 } | 453 } |
| 452 | 454 |
| (...skipping 17 matching lines...) Expand all Loading... |
| 470 void Lexer<Char>::UpdateBufferBasedOnHandle() { | 472 void Lexer<Char>::UpdateBufferBasedOnHandle() { |
| 471 // We get a raw pointer from the Handle, but we also update it every time | 473 // We get a raw pointer from the Handle, but we also update it every time |
| 472 // there is a GC, so it is safe. | 474 // there is a GC, so it is safe. |
| 473 DisallowHeapAllocation no_gc; | 475 DisallowHeapAllocation no_gc; |
| 474 const Char* new_buffer = GetNewBufferBasedOnHandle(); | 476 const Char* new_buffer = GetNewBufferBasedOnHandle(); |
| 475 if (new_buffer != buffer_) { | 477 if (new_buffer != buffer_) { |
| 476 int start_offset = start_ - buffer_; | 478 int start_offset = start_ - buffer_; |
| 477 int cursor_offset = cursor_ - buffer_; | 479 int cursor_offset = cursor_ - buffer_; |
| 478 int last_octal_end_offset = last_octal_end_ - buffer_; | 480 int last_octal_end_offset = last_octal_end_ - buffer_; |
| 479 buffer_ = new_buffer; | 481 buffer_ = new_buffer; |
| 480 buffer_end_ = buffer_ + source_handle_->length(); | 482 buffer_end_ = buffer_ + end_position_; |
| 481 start_ = buffer_ + start_offset; | 483 start_ = buffer_ + start_offset; |
| 482 cursor_ = buffer_ + cursor_offset; | 484 cursor_ = buffer_ + cursor_offset; |
| 483 if (last_octal_end_ != NULL) { | 485 if (last_octal_end_ != NULL) { |
| 484 last_octal_end_ = buffer_ + last_octal_end_offset; | 486 last_octal_end_ = buffer_ + last_octal_end_offset; |
| 485 } | 487 } |
| 486 ResetLiterals(); | 488 current_literal_->Invalidate(); |
| 489 next_literal_->Invalidate(); |
| 487 } | 490 } |
| 488 } | 491 } |
| 489 | 492 |
| 490 | 493 |
| 491 template<> | 494 void LexerBase::LiteralDesc::SetOneByteString( |
| 492 bool Lexer<uint8_t>::IsSubstringOfSource(const TokenDesc& token) { | 495 Vector<const uint8_t> string, bool owned) { |
| 493 return !token.has_escapes; | 496 is_in_buffer_ = false; |
| 497 if (is_one_byte_string_owned_) { |
| 498 one_byte_string_.Dispose(); |
| 499 } |
| 500 is_one_byte_string_owned_ = owned; |
| 501 is_one_byte_ = true; |
| 502 one_byte_string_ = string; |
| 503 } |
| 504 |
| 505 |
| 506 void LexerBase::LiteralDesc::SetTwoByteString(Vector<const uint16_t> string) { |
| 507 is_in_buffer_ = false; |
| 508 is_one_byte_ = false; |
| 509 two_byte_string_ = string; |
| 510 } |
| 511 |
| 512 |
| 513 void LexerBase::LiteralDesc::SetStringFromLiteralBuffer() { |
| 514 is_one_byte_ = buffer.is_ascii(); |
| 515 is_in_buffer_ = true; |
| 516 length = buffer.length(); |
| 517 if (is_one_byte_) { |
| 518 if (is_one_byte_string_owned_) { |
| 519 one_byte_string_.Dispose(); |
| 520 } |
| 521 is_one_byte_string_owned_ = false; |
| 522 one_byte_string_ = Vector<const uint8_t>::cast(buffer.ascii_literal()); |
| 523 } else { |
| 524 two_byte_string_ = buffer.utf16_literal(); |
| 525 } |
| 526 } |
| 527 |
| 528 |
| 529 static inline bool IsOneByte(const uint8_t* cursor, const uint8_t* end) { |
| 530 return true; |
| 531 } |
| 532 |
| 533 |
| 534 static inline bool IsOneByte(const uint16_t* cursor, const uint16_t* end) { |
| 535 uint16_t acc = 0; |
| 536 while (cursor != end) { |
| 537 acc |= *cursor++ >> 8; |
| 538 } |
| 539 return acc == 0; |
| 540 } |
| 541 |
| 542 |
| 543 static inline bool IsOneByte(const int8_t* cursor, const int8_t* end) { |
| 544 int8_t acc = 0; |
| 545 while (cursor != end) { |
| 546 acc |= *cursor++ >> 7; |
| 547 } |
| 548 return acc == 0; |
| 494 } | 549 } |
| 495 | 550 |
| 496 | 551 |
| 497 template<> | 552 template<> |
| 498 bool Lexer<uint16_t>::IsSubstringOfSource( | 553 template<> |
| 499 const TokenDesc& token) { | 554 inline void Lexer<uint16_t>::SetLiteral<true>(const uint16_t* cursor, |
| 500 if (token.has_escapes) return false; | 555 const uint16_t* end, |
| 501 const uint16_t* start = buffer_ + token.beg_pos; | 556 LiteralDesc* literal) { |
| 502 const uint16_t* end = buffer_ + token.end_pos; | 557 Vector<uint8_t> vector = Vector<uint8_t>::New(literal->length); |
| 503 for (const uint16_t* cursor = start; cursor != end; ++cursor) { | 558 uint8_t* data = vector.start(); |
| 504 if (*cursor >= unibrow::Latin1::kMaxChar) return true; | 559 while (cursor < end) { |
| 560 *data++ = *cursor++; |
| 505 } | 561 } |
| 506 return false; | 562 literal->SetOneByteString(Vector<const uint8_t>::cast(vector), true); |
| 507 } | 563 } |
| 508 | 564 |
| 509 | 565 |
| 510 template<> | 566 template<> |
| 511 bool Lexer<int8_t>::IsSubstringOfSource(const TokenDesc& token) { | 567 template<> |
| 512 // FIXME: implement. | 568 inline void Lexer<uint16_t>::SetLiteral<false>(const uint16_t* start, |
| 513 UNREACHABLE(); | 569 const uint16_t* end, |
| 514 return false; | 570 LiteralDesc* literal) { |
| 571 literal->SetTwoByteString(Vector<const uint16_t>(start, literal->length)); |
| 515 } | 572 } |
| 516 | 573 |
| 517 | 574 |
| 518 template<> | 575 template<> |
| 519 bool Lexer<uint8_t>::FillLiteral( | 576 template<> |
| 520 const TokenDesc& token, LiteralDesc* literal) { | 577 inline void Lexer<uint8_t>::SetLiteral<true>(const uint8_t* start, |
| 578 const uint8_t* end, |
| 579 LiteralDesc* literal) { |
| 580 literal->SetOneByteString( |
| 581 Vector<const uint8_t>(start, literal->length), false); |
| 582 } |
| 583 |
| 584 |
| 585 template<> |
| 586 template<> |
| 587 inline void Lexer<int8_t>::SetLiteral<true>(const int8_t* start, |
| 588 const int8_t* end, |
| 589 LiteralDesc* literal) { |
| 590 const uint8_t* cast = reinterpret_cast<const uint8_t*>(start); |
| 591 literal->SetOneByteString( |
| 592 Vector<const uint8_t>(cast, literal->length), false); |
| 593 } |
| 594 |
| 595 |
| 596 template<class Char> |
| 597 bool Lexer<Char>::FillLiteral(const TokenDesc& token, LiteralDesc* literal) { |
| 521 literal->beg_pos = token.beg_pos; | 598 literal->beg_pos = token.beg_pos; |
| 522 const uint8_t* start = buffer_ + token.beg_pos; | 599 const Char* start = buffer_ + token.beg_pos; |
| 523 const uint8_t* end = buffer_ + token.end_pos; | 600 const Char* end = buffer_ + token.end_pos; |
| 524 if (token.token == Token::STRING) { | 601 if (token.token == Token::STRING) { |
| 525 ++start; | 602 ++start; |
| 526 --end; | 603 --end; |
| 527 } | 604 } |
| 528 if (IsSubstringOfSource(token)) { | 605 if (!token.has_escapes) { |
| 529 literal->is_one_byte = true; | 606 bool is_one_byte = IsOneByte(start, end); |
| 530 literal->is_in_buffer = false; | 607 if (sizeof(Char) == 2 || is_one_byte) { |
| 531 literal->offset = start - buffer_; | 608 literal->offset = start - buffer_; |
| 532 literal->length = end - start; | 609 literal->length = end - start; |
| 533 literal->one_byte_string = Vector<const uint8_t>(start, literal->length); | 610 if (sizeof(Char) == 1) { |
| 534 return true; | 611 SetLiteral<true>(start, end, literal); |
| 612 } else if (is_one_byte) { |
| 613 SetLiteral<true>(start, end, literal); |
| 614 } else { |
| 615 SetLiteral<false>(start, end, literal); |
| 616 } |
| 617 return true; |
| 618 } |
| 535 } | 619 } |
| 536 return CopyToLiteralBuffer(start, end, token, literal); | 620 return CopyToLiteralBuffer(start, end, token, literal); |
| 537 } | 621 } |
| 538 | 622 |
| 539 | 623 |
| 540 template<> | |
| 541 bool Lexer<uint16_t>::FillLiteral( | |
| 542 const TokenDesc& token, LiteralDesc* literal) { | |
| 543 literal->beg_pos = token.beg_pos; | |
| 544 const uint16_t* start = buffer_ + token.beg_pos; | |
| 545 const uint16_t* end = buffer_ + token.end_pos; | |
| 546 if (token.token == Token::STRING) { | |
| 547 ++start; | |
| 548 --end; | |
| 549 } | |
| 550 if (IsSubstringOfSource(token)) { | |
| 551 literal->is_one_byte = false; | |
| 552 literal->is_in_buffer = false; | |
| 553 literal->offset = start - buffer_; | |
| 554 literal->length = end - start; | |
| 555 literal->two_byte_string = Vector<const uint16_t>(start, literal->length); | |
| 556 return true; | |
| 557 } | |
| 558 return CopyToLiteralBuffer(start, end, token, literal); | |
| 559 } | |
| 560 | |
| 561 | |
| 562 template<> | |
| 563 bool Lexer<int8_t>::FillLiteral( | |
| 564 const TokenDesc& token, LiteralDesc* literal) { | |
| 565 // FIXME: implement. | |
| 566 UNREACHABLE(); | |
| 567 return false; | |
| 568 } | |
| 569 | |
| 570 | |
| 571 template<class Char> | 624 template<class Char> |
| 572 bool Lexer<Char>::CopyToLiteralBuffer(const Char* start, | 625 bool Lexer<Char>::CopyToLiteralBuffer(const Char* start, |
| 573 const Char* end, | 626 const Char* end, |
| 574 const TokenDesc& token, | 627 const TokenDesc& token, |
| 575 LiteralDesc* literal) { | 628 LiteralDesc* literal) { |
| 576 literal->buffer.Reset(); | 629 literal->buffer.Reset(); |
| 577 if (token.has_escapes) { | 630 if (token.has_escapes) { |
| 578 for (const Char* cursor = start; cursor != end;) { | 631 for (const Char* cursor = start; cursor != end;) { |
| 579 if (*cursor != '\\') { | 632 if (*cursor != '\\') { |
| 580 literal->buffer.AddChar(*cursor++); | 633 literal->buffer.AddChar(*cursor++); |
| 581 } else if (token.token == Token::IDENTIFIER) { | 634 } else if (token.token == Token::IDENTIFIER) { |
| 582 uc32 c; | 635 uc32 c; |
| 583 cursor = ScanIdentifierUnicodeEscape(cursor, end, &c); | 636 cursor = ScanIdentifierUnicodeEscape(cursor, end, &c); |
| 584 ASSERT(cursor != NULL); | 637 ASSERT(cursor != NULL); |
| 585 if (cursor == NULL) return false; | 638 if (cursor == NULL) return false; |
| 586 literal->buffer.AddChar(c); | 639 literal->buffer.AddChar(c); |
| 587 } else { | 640 } else { |
| 588 cursor = ScanEscape(cursor, end, &literal->buffer); | 641 cursor = ScanEscape(unicode_cache_, cursor, end, &literal->buffer); |
| 589 ASSERT(cursor != NULL); | 642 ASSERT(cursor != NULL); |
| 590 if (cursor == NULL) return false; | 643 if (cursor == NULL) return false; |
| 591 } | 644 } |
| 592 } | 645 } |
| 593 } else { | 646 } else { |
| 647 // TODO(dcarney): This can only happen for utf8 strings |
| 648 // use a helper function. |
| 594 for (const Char* cursor = start; cursor != end;) { | 649 for (const Char* cursor = start; cursor != end;) { |
| 595 literal->buffer.AddChar(*cursor++); | 650 literal->buffer.AddChar(*cursor++); |
| 596 } | 651 } |
| 597 } | 652 } |
| 598 literal->is_one_byte = literal->buffer.is_ascii(); | 653 literal->SetStringFromLiteralBuffer(); |
| 599 literal->is_in_buffer = true; | |
| 600 literal->length = literal->buffer.length(); | |
| 601 if (literal->is_one_byte) { | |
| 602 literal->one_byte_string = | |
| 603 Vector<const uint8_t>::cast(literal->buffer.ascii_literal()); | |
| 604 } else { | |
| 605 literal->two_byte_string = literal->buffer.utf16_literal(); | |
| 606 } | |
| 607 return true; | 654 return true; |
| 608 } | 655 } |
| 609 | 656 |
| 610 | 657 |
| 611 template<class Char> | 658 template<class Char> |
| 612 Handle<String> Lexer<Char>::InternalizeLiteral( | 659 Handle<String> Lexer<Char>::InternalizeLiteral( |
| 613 LiteralDesc* literal) { | 660 LiteralDesc* literal) { |
| 614 Factory* factory = isolate_->factory(); | 661 // Factory* factory = isolate_->factory(); |
| 615 if (literal->is_in_buffer) { | 662 // if (literal->is_in_buffer) { |
| 616 return literal->is_one_byte | 663 // return literal->is_one_byte |
| 617 ? factory->InternalizeOneByteString( | 664 // ? factory->InternalizeOneByteString( |
| 618 Vector<const uint8_t>::cast(literal->one_byte_string)) | 665 // Vector<const uint8_t>::cast(literal->one_byte_string)) |
| 619 : factory->InternalizeTwoByteString(literal->two_byte_string); | 666 // : factory->InternalizeTwoByteString(literal->two_byte_string); |
| 620 } | 667 // } |
| 621 if (sizeof(Char) == 1) { | 668 // if (sizeof(Char) == 1) { |
| 622 SubStringKey<uint8_t> key( | 669 // SubStringKey<uint8_t> key( |
| 623 source_handle_, literal->offset, literal->length); | 670 // source_handle_, literal->offset, literal->length); |
| 624 return factory->InternalizeStringWithKey(&key); | 671 // return factory->InternalizeStringWithKey(&key); |
| 625 } else { | 672 // } else { |
| 626 SubStringKey<uint16_t> key( | 673 // SubStringKey<uint16_t> key( |
| 627 source_handle_, literal->offset, literal->length); | 674 // source_handle_, literal->offset, literal->length); |
| 628 return factory->InternalizeStringWithKey(&key); | 675 // return factory->InternalizeStringWithKey(&key); |
| 629 } | 676 // } |
| 677 CHECK(false); |
| 678 return Handle<String>(); |
| 630 } | 679 } |
| 631 | 680 |
| 632 | 681 |
| 633 template<> | 682 template<> |
| 634 Handle<String> Lexer<uint8_t>::AllocateLiteral( | 683 Handle<String> Lexer<uint8_t>::AllocateLiteral( |
| 635 LiteralDesc* literal, PretenureFlag pretenured) { | 684 LiteralDesc* literal, PretenureFlag pretenured) { |
| 636 Factory* factory = isolate_->factory(); | 685 // Factory* factory = isolate_->factory(); |
| 637 if (literal->is_in_buffer) { | 686 // if (literal->is_in_buffer) { |
| 638 return literal->is_one_byte | 687 // return literal->is_one_byte |
| 639 ? factory->NewStringFromOneByte(literal->one_byte_string, pretenured) | 688 // ? factory->NewStringFromOneByte(literal->one_byte_string, pretenured) |
| 640 : factory->NewStringFromTwoByte(literal->two_byte_string, pretenured); | 689 // : factory->NewStringFromTwoByte(literal->two_byte_string, pretenured) |
| 641 } | 690 // } |
| 642 int from = literal->offset; | 691 // int from = literal->offset; |
| 643 int length = literal->length; | 692 // int length = literal->length; |
| 644 // Save the offset and the length before allocating the string as it may | 693 // // Save the offset and the length before allocating the string as it may |
| 645 // cause a GC, invalidate the literal, and move the source. | 694 // // cause a GC, invalidate the literal, and move the source. |
| 646 Handle<String> result = factory->NewRawOneByteString(length, pretenured); | 695 // Handle<String> result = factory->NewRawOneByteString(length, pretenured); |
| 647 uint8_t* chars = SeqOneByteString::cast(*result)->GetChars(); | 696 // uint8_t* chars = SeqOneByteString::cast(*result)->GetChars(); |
| 648 String::WriteToFlat(*source_handle_, chars, from, from + length); | 697 // String::WriteToFlat(*source_handle_, chars, from, from + length); |
| 649 return result; | 698 // return result; |
| 699 CHECK(false); |
| 700 return Handle<String>(); |
| 650 } | 701 } |
| 651 | 702 |
| 652 | 703 |
| 653 template<> | 704 template<> |
| 654 Handle<String> Lexer<uint16_t>::AllocateLiteral( | 705 Handle<String> Lexer<uint16_t>::AllocateLiteral( |
| 655 LiteralDesc* literal, PretenureFlag pretenured) { | 706 LiteralDesc* literal, PretenureFlag pretenured) { |
| 656 Factory* factory = isolate_->factory(); | 707 // Factory* factory = isolate_->factory(); |
| 657 if (literal->is_in_buffer) { | 708 // if (literal->is_in_buffer) { |
| 658 return literal->is_one_byte | 709 // return literal->is_one_byte |
| 659 ? factory->NewStringFromOneByte(literal->one_byte_string, pretenured) | 710 // ? factory->NewStringFromOneByte(literal->one_byte_string, pretenured) |
| 660 : factory->NewStringFromTwoByte(literal->two_byte_string, pretenured); | 711 // : factory->NewStringFromTwoByte(literal->two_byte_string, pretenured) |
| 661 } | 712 // } |
| 662 // Save the offset and the length before allocating the string as it may | 713 // // Save the offset and the length before allocating the string as it may |
| 663 // cause a GC, invalidate the literal, and move the source. | 714 // // cause a GC, invalidate the literal, and move the source. |
| 664 int from = literal->offset; | 715 // int from = literal->offset; |
| 665 int length = literal->length; | 716 // int length = literal->length; |
| 666 Handle<String> result = factory->NewRawTwoByteString(length, pretenured); | 717 // Handle<String> result = factory->NewRawTwoByteString(length, pretenured); |
| 667 uint16_t* chars = SeqTwoByteString::cast(*result)->GetChars(); | 718 // uint16_t* chars = SeqTwoByteString::cast(*result)->GetChars(); |
| 668 String::WriteToFlat(*source_handle_, chars, from, from + length); | 719 // String::WriteToFlat(*source_handle_, chars, from, from + length); |
| 669 return result; | 720 // return result; |
| 721 CHECK(false); |
| 722 return Handle<String>(); |
| 670 } | 723 } |
| 671 | 724 |
| 672 | 725 |
| 673 template<> | 726 template<> |
| 674 Handle<String> Lexer<int8_t>::AllocateLiteral( | 727 Handle<String> Lexer<int8_t>::AllocateLiteral( |
| 675 LiteralDesc* literal, PretenureFlag pretenured) { | 728 LiteralDesc* literal, PretenureFlag pretenured) { |
| 676 // FIXME: implement | 729 CHECK(false); |
| 677 UNREACHABLE(); | |
| 678 return Handle<String>(); | 730 return Handle<String>(); |
| 679 } | 731 } |
| 680 | 732 |
| 733 |
| 681 template class Lexer<uint8_t>; | 734 template class Lexer<uint8_t>; |
| 682 template class Lexer<uint16_t>; | 735 template class Lexer<uint16_t>; |
| 683 template class Lexer<int8_t>; | 736 template class Lexer<int8_t>; |
| 684 | 737 |
| 685 } } // v8::internal | 738 } } // v8::internal |
| OLD | NEW |