src/scanner.cc - Issue 663683006: Implement ES6 Template Literals

Unified Diff: src/scanner.cc

Issue 663683006: Implement ES6 Template Literals (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge

Patch Set: Tiny fixups Created 6 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/scanner.cc

diff --git a/src/scanner.cc b/src/scanner.cc

index ddcd937584d50fc45fa921a25e2a5d21fcf5697e..1b489fdbffffa18f3242287935e924a19dcba58e 100644

--- a/src/scanner.cc

+++ b/src/scanner.cc

@@ -54,7 +54,7 @@ void Scanner::Initialize(Utf16CharacterStream* source) {

}

-uc32 Scanner::ScanHexNumber(int expected_length) {

+uc32 Scanner::ScanHexNumber(int expected_length, bool recordRaw) {

DCHECK(expected_length <= 4); // prevent overflow

uc32 x = 0;

@@ -64,6 +64,9 @@ uc32 Scanner::ScanHexNumber(int expected_length) {

return -1;

}

x = x * 16 + d;

+ if (recordRaw) {

+ AddRawLiteralChar(c0_);

+ }

Advance();

}

@@ -403,7 +406,9 @@ Token::Value Scanner::ScanHtmlComment() {

void Scanner::Scan() {

next_.literal_chars = NULL;

+ next_.raw_literal_chars = NULL;

Token::Value token;

do {

// Remember the position of the next token

next_.location.beg_pos = source_pos();

@@ -626,6 +631,12 @@ void Scanner::Scan() {

token = Select(Token::BIT_NOT);

break;

+ case '`':

+ if (HarmonyTemplates()) {

+ token = ScanTemplateSpan();

+ break;

+ }

default:

if (c0_ < 0) {

token = Token::EOS;

@@ -671,8 +682,10 @@ void Scanner::SeekForward(int pos) {

}

-bool Scanner::ScanEscape() {

+bool Scanner::ScanEscape(bool recordRaw) {

uc32 c = c0_;

+ uc32 rc = c;

+ bool singleCharEscape = true;

Advance();

// Skip escaped newlines.

@@ -694,13 +707,17 @@ bool Scanner::ScanEscape() {

case 'r' : c = '\r'; break;

case 't' : c = '\t'; break;

case 'u' : {

- c = ScanHexNumber(4);

+ if (recordRaw) AddRawLiteralChar('u');

+ singleCharEscape = false;

+ c = ScanHexNumber(4, recordRaw);

if (c < 0) return false;

break;

}

case 'v' : c = '\v'; break;

case 'x' : {

- c = ScanHexNumber(2);

+ if (recordRaw) AddRawLiteralChar('x');

+ singleCharEscape = false;

+ c = ScanHexNumber(2, recordRaw);

if (c < 0) return false;

break;

}

@@ -711,12 +728,16 @@ bool Scanner::ScanEscape() {

case '4' : // fall through

case '5' : // fall through

case '6' : // fall through

- case '7' : c = ScanOctalEscape(c, 2); break;

+ case '7':

+ singleCharEscape = false;

+ c = ScanOctalEscape(c, 2, recordRaw);

+ break;

}

// According to ECMA-262, section 7.8.4, characters not covered by the

// above cases should be illegal, but they are commonly handled as

// non-escaped characters by JS VMs.

+ if (singleCharEscape && recordRaw) AddRawLiteralChar(rc);

AddLiteralChar(c);

return true;

}

@@ -724,7 +745,7 @@ bool Scanner::ScanEscape() {

// Octal escapes of the forms '\0xx' and '\xxx' are not a part of

// ECMA-262. Other JS VMs support them.

-uc32 Scanner::ScanOctalEscape(uc32 c, int length) {

+uc32 Scanner::ScanOctalEscape(uc32 c, int length, bool recordRaw) {

uc32 x = c - '0';

int i = 0;

for (; i < length; i++) {

@@ -733,6 +754,9 @@ uc32 Scanner::ScanOctalEscape(uc32 c, int length) {

int nx = x * 8 + d;

if (nx >= 256) break;

x = nx;

+ if (recordRaw) {

+ AddRawLiteralChar(c0_);

+ }

Advance();

}

// Anything except '\0' is an octal escape sequence, illegal in strict mode.

@@ -770,6 +794,87 @@ Token::Value Scanner::ScanString() {

}

+Token::Value Scanner::ScanTemplateSpan() {

+ // When scanning a TemplateSpan, we are looking for the following construct:

+ // TEMPLATE_SPAN ::

+ // ` LiteralChars* ${

+ // | } LiteralChars* ${

+ //

+ // TEMPLATE_TAIL ::

+ // ` LiteralChars* `

+ // | } LiteralChar* `

+ //

+ // A TEMPLATE_SPAN should always be followed by an Expression, while a

+ // TEMPLATE_TAIL terminates a TemplateLiteral and does not need to be

+ // followed by an Expression.

+ //

+ // raw_literal_chars_ represents TRV or the raw value of the template span,

+ // per the spec, while literal_chars_ represents TV or the cooked value of

+ // the template span.

+ //

+ // TODO(caitp): Do not store a separate literal buffer for the span TRV.

+ //

+ if (next_.token == Token::RBRACE) {

+ PushBack('}');

marja 2014/11/11 09:47:24 Hmm, when does this happen and why do we PushBack?

caitp (gmail) 2014/11/11 13:59:29 After parsing an expression, the scanner ends up w

marja 2014/11/11 15:01:56 But after this, we anyway do Advance() right away.

+ }

+ next_.location.beg_pos = source_pos();

+ Token::Value result = Token::ILLEGAL;

+ DCHECK(c0_ == '`' || c0_ == '}');

+ Advance(); // Consume ` or }

+ LiteralScope literal(this);

+ while (true) {

+ uc32 c = c0_;

+ Advance();

+ if (c == '`') {

+ result = Token::TEMPLATE_TAIL;

+ break;

+ } else if (c == '$' && c0_ == '{') {

+ Advance(); // Consume '{'

+ result = Token::TEMPLATE_SPAN;

+ break;

+ } else if (c == '\\') {

+ AddRawLiteralChar('\\');

+ if (unicode_cache_->IsLineTerminator(c0_)) {

+ // The TV of LineContinuation :: \ LineTerminatorSequence is the empty

+ // code unit sequence.

+ do {

+ uc32 lastChar = c0_;

+ Advance();

+ if (lastChar == '\r' && c0_ == '\n') Advance();

+ AddRawLiteralChar('\n');

+ } while (unicode_cache_->IsLineTerminator(c0_));

+ } else if (c0_ == '0') {

+ Advance();

+ AddRawLiteralChar('0');

+ AddLiteralChar('0');

+ } else {

+ ScanEscape(true);

+ }

+ } else if (c < 0) {

+ // Unterminated template literal

+ PushBack(c);

+ break;

+ } else {

+ // The TRV of LineTerminatorSequence :: <CR> is the CV 0x000A.

+ // The TRV of LineTerminatorSequence :: <CR><LF> is the sequence

+ // consisting of the CV 0x000A.

+ if (c == '\r') {

+ if (c0_ == '\n') Advance();

+ c = '\n';

+ }

+ AddLiteralChar(c);

+ AddRawLiteralChar(c);

+ }

+ literal.Complete();

+ next_.location.end_pos = source_pos();

+ next_.token = result;

+ return result;

void Scanner::ScanDecimalDigits() {

while (IsDecimalDigit(c0_))

AddLiteralCharAdvance();

@@ -1163,6 +1268,15 @@ const AstRawString* Scanner::CurrentSymbol(AstValueFactory* ast_value_factory) {

}

+const AstRawString* Scanner::CurrentRawSymbol(

+ AstValueFactory* ast_value_factory) {

+ if (is_raw_one_byte()) {

+ return ast_value_factory->GetOneByteString(raw_one_byte_string());

+ }

+ return ast_value_factory->GetTwoByteString(raw_two_byte_string());

const AstRawString* Scanner::NextSymbol(AstValueFactory* ast_value_factory) {

if (is_next_literal_one_byte()) {

return ast_value_factory->GetOneByteString(next_literal_one_byte_string());

@@ -1171,6 +1285,14 @@ const AstRawString* Scanner::NextSymbol(AstValueFactory* ast_value_factory) {

}

+const AstRawString* Scanner::NextRawSymbol(AstValueFactory* ast_value_factory) {

+ if (is_next_raw_one_byte()) {

+ return ast_value_factory->GetOneByteString(next_raw_one_byte_string());

+ }

+ return ast_value_factory->GetTwoByteString(next_raw_two_byte_string());

double Scanner::DoubleValue() {

DCHECK(is_literal_one_byte());

return StringToDouble(

« src/preparser.h ('K') | « src/scanner.h ('k') | src/token.h » ('j') | no next file with comments »