src/scanner.cc - Issue 5188006: Push version 2.5.7 to trunk....

Unified Diff: src/scanner.cc

Issue 5188006: Push version 2.5.7 to trunk.... (Closed) Base URL: http://v8.googlecode.com/svn/trunk/

Patch Set: Created 10 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: src/scanner.cc

===================================================================

--- src/scanner.cc (revision 5846)

+++ src/scanner.cc (working copy)

@@ -30,27 +30,15 @@

#include "ast.h"

#include "handles.h"

#include "scanner.h"

+#include "unicode-inl.h"

namespace v8 {

namespace internal {

// ----------------------------------------------------------------------------

-// Character predicates

-unibrow::Predicate<IdentifierStart, 128> Scanner::kIsIdentifierStart;

-unibrow::Predicate<IdentifierPart, 128> Scanner::kIsIdentifierPart;

-unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator;

-unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace;

-StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_;

-// ----------------------------------------------------------------------------

// UTF8Buffer

-UTF8Buffer::UTF8Buffer() : buffer_(kInitialCapacity) { }

+UTF8Buffer::UTF8Buffer() : buffer_(kInitialCapacity), recording_(false) { }

UTF8Buffer::~UTF8Buffer() {}

@@ -135,55 +123,6 @@

}

-// ExternalStringUTF16Buffer

-template <typename StringType, typename CharType>

-ExternalStringUTF16Buffer<StringType, CharType>::ExternalStringUTF16Buffer()

- : raw_data_(NULL) { }

-template <typename StringType, typename CharType>

-void ExternalStringUTF16Buffer<StringType, CharType>::Initialize(

- Handle<StringType> data,

- int start_position,

- int end_position) {

- ASSERT(!data.is_null());

- raw_data_ = data->resource()->data();

- ASSERT(end_position <= data->length());

- if (start_position > 0) {

- SeekForward(start_position);

- }

- end_ =

- end_position != Scanner::kNoEndPosition ? end_position : data->length();

-template <typename StringType, typename CharType>

-uc32 ExternalStringUTF16Buffer<StringType, CharType>::Advance() {

- if (pos_ < end_) {

- return raw_data_[pos_++];

- } else {

- // note: currently the following increment is necessary to avoid a

- // test-parser problem!

- pos_++;

- return static_cast<uc32>(-1);

- }

-template <typename StringType, typename CharType>

-void ExternalStringUTF16Buffer<StringType, CharType>::PushBack(uc32 ch) {

- pos_--;

- ASSERT(pos_ >= Scanner::kCharacterLookaheadBufferSize);

- ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch);

-template <typename StringType, typename CharType>

-void ExternalStringUTF16Buffer<StringType, CharType>::SeekForward(int pos) {

- pos_ = pos;

// ----------------------------------------------------------------------------

// Scanner::LiteralScope

@@ -309,7 +248,7 @@

}

-void Scanner::AddChar(uc32 c) {

+void Scanner::AddLiteralChar(uc32 c) {

literal_buffer_.AddChar(c);

}

@@ -324,8 +263,8 @@

}

-void Scanner::AddCharAdvance() {

- AddChar(c0_);

+void Scanner::AddLiteralCharAdvance() {

+ AddLiteralChar(c0_);

Advance();

}

@@ -358,9 +297,9 @@

while (true) {

// We treat byte-order marks (BOMs) as whitespace for better

// compatibility with Spidermonkey and other JavaScript engines.

- while (kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) {

+ while (ScannerConstants::kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) {

// IsWhiteSpace() includes line terminators!

- if (kIsLineTerminator.get(c0_)) {

+ if (ScannerConstants::kIsLineTerminator.get(c0_)) {

// Ignore line terminators, but remember them. This is necessary

// for automatic semicolon insertion.

has_line_terminator_before_next_ = true;

@@ -400,7 +339,7 @@

// separately by the lexical grammar and becomes part of the

// stream of input elements for the syntactic grammar (see

// ECMA-262, section 7.4, page 12).

- while (c0_ >= 0 && !kIsLineTerminator.get(c0_)) {

+ while (c0_ >= 0 && !ScannerConstants::kIsLineTerminator.get(c0_)) {

Advance();

}

@@ -537,29 +476,29 @@

// Check for control character (0x00-0x1f) or unterminated string (<0).

if (c0_ < 0x20) return Token::ILLEGAL;

if (c0_ != '\\') {

- AddCharAdvance();

+ AddLiteralCharAdvance();

} else {

Advance();

switch (c0_) {

case '"':

case '\\':

case '/':

- AddChar(c0_);

+ AddLiteralChar(c0_);

break;

case 'b':

- AddChar('\x08');

+ AddLiteralChar('\x08');

break;

case 'f':

- AddChar('\x0c');

+ AddLiteralChar('\x0c');

break;

case 'n':

- AddChar('\x0a');

+ AddLiteralChar('\x0a');

break;

case 'r':

- AddChar('\x0d');

+ AddLiteralChar('\x0d');

break;

case 't':

- AddChar('\x09');

+ AddLiteralChar('\x09');

break;

case 'u': {

uc32 value = 0;

@@ -571,7 +510,7 @@

}

value = value * 16 + digit;

}

- AddChar(value);

+ AddLiteralChar(value);

break;

}

default:

@@ -591,31 +530,31 @@

Token::Value Scanner::ScanJsonNumber() {

LiteralScope literal(this);

- if (c0_ == '-') AddCharAdvance();

+ if (c0_ == '-') AddLiteralCharAdvance();

if (c0_ == '0') {

- AddCharAdvance();

+ AddLiteralCharAdvance();

// Prefix zero is only allowed if it's the only digit before

// a decimal point or exponent.

if ('0' <= c0_ && c0_ <= '9') return Token::ILLEGAL;

} else {

if (c0_ < '1' || c0_ > '9') return Token::ILLEGAL;

do {

- AddCharAdvance();

+ AddLiteralCharAdvance();

} while (c0_ >= '0' && c0_ <= '9');

}

if (c0_ == '.') {

- AddCharAdvance();

+ AddLiteralCharAdvance();

if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL;

do {

- AddCharAdvance();

+ AddLiteralCharAdvance();

} while (c0_ >= '0' && c0_ <= '9');

}

if (AsciiAlphaToLower(c0_) == 'e') {

- AddCharAdvance();

- if (c0_ == '-' || c0_ == '+') AddCharAdvance();

+ AddLiteralCharAdvance();

+ if (c0_ == '-' || c0_ == '+') AddLiteralCharAdvance();

if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL;

do {

- AddCharAdvance();

+ AddLiteralCharAdvance();

} while (c0_ >= '0' && c0_ <= '9');

}

literal.Complete();

@@ -631,7 +570,7 @@

Advance();

text++;

}

- if (kIsIdentifierPart.get(c0_)) return Token::ILLEGAL;

+ if (ScannerConstants::kIsIdentifierPart.get(c0_)) return Token::ILLEGAL;

literal.Complete();

return token;

}

@@ -854,7 +793,7 @@

break;

default:

- if (kIsIdentifierStart.get(c0_)) {

+ if (ScannerConstants::kIsIdentifierStart.get(c0_)) {

token = ScanIdentifier();

} else if (IsDecimalDigit(c0_)) {

token = ScanNumber(false);

@@ -937,7 +876,7 @@

Advance();

// Skip escaped newlines.

- if (kIsLineTerminator.get(c)) {

+ if (ScannerConstants::kIsLineTerminator.get(c)) {

// Allow CR+LF newlines in multiline string literals.

if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();

// Allow LF+CR newlines in multiline string literals.

@@ -970,7 +909,7 @@

// According to ECMA-262, 3rd, 7.8.4 (p 18ff) these

// should be illegal, but they are commonly handled

// as non-escaped characters by JS VMs.

- AddChar(c);

+ AddLiteralChar(c);

}

@@ -979,14 +918,15 @@

Advance(); // consume quote

LiteralScope literal(this);

- while (c0_ != quote && c0_ >= 0 && !kIsLineTerminator.get(c0_)) {

+ while (c0_ != quote && c0_ >= 0

+ && !ScannerConstants::kIsLineTerminator.get(c0_)) {

uc32 c = c0_;

Advance();

if (c == '\\') {

if (c0_ < 0) return Token::ILLEGAL;

ScanEscape();

} else {

- AddChar(c);

+ AddLiteralChar(c);

}

if (c0_ != quote) return Token::ILLEGAL;

@@ -1017,7 +957,7 @@

// Returns true if any decimal digits were scanned, returns false otherwise.

void Scanner::ScanDecimalDigits() {

while (IsDecimalDigit(c0_))

- AddCharAdvance();

+ AddLiteralCharAdvance();

}

@@ -1029,25 +969,25 @@

LiteralScope literal(this);

if (seen_period) {

// we have already seen a decimal point of the float

- AddChar('.');

+ AddLiteralChar('.');

ScanDecimalDigits(); // we know we have at least one digit

} else {

// if the first character is '0' we must check for octals and hex

if (c0_ == '0') {

- AddCharAdvance();

+ AddLiteralCharAdvance();

// either 0, 0exxx, 0Exxx, 0.xxx, an octal number, or a hex number

if (c0_ == 'x' || c0_ == 'X') {

// hex number

kind = HEX;

- AddCharAdvance();

+ AddLiteralCharAdvance();

if (!IsHexDigit(c0_)) {

// we must have at least one hex digit after 'x'/'X'

return Token::ILLEGAL;

}

while (IsHexDigit(c0_)) {

- AddCharAdvance();

+ AddLiteralCharAdvance();

}

} else if ('0' <= c0_ && c0_ <= '7') {

// (possible) octal number

@@ -1058,7 +998,7 @@

break;

}

if (c0_ < '0' || '7' < c0_) break;

- AddCharAdvance();

+ AddLiteralCharAdvance();

}

@@ -1067,7 +1007,7 @@

if (kind == DECIMAL) {

ScanDecimalDigits(); // optional

if (c0_ == '.') {

- AddCharAdvance();

+ AddLiteralCharAdvance();

ScanDecimalDigits(); // optional

}

@@ -1078,9 +1018,9 @@

ASSERT(kind != HEX); // 'e'/'E' must be scanned as part of the hex number

if (kind == OCTAL) return Token::ILLEGAL; // no exponent for octals allowed

// scan exponent

- AddCharAdvance();

+ AddLiteralCharAdvance();

if (c0_ == '+' || c0_ == '-')

- AddCharAdvance();

+ AddLiteralCharAdvance();

if (!IsDecimalDigit(c0_)) {

// we must have at least one decimal digit after 'e'/'E'

return Token::ILLEGAL;

@@ -1092,7 +1032,7 @@

// not be an identifier start or a decimal digit; see ECMA-262

// section 7.8.3, page 17 (note that we read only one decimal digit

// if the value is 0).

- if (IsDecimalDigit(c0_) || kIsIdentifierStart.get(c0_))

+ if (IsDecimalDigit(c0_) || ScannerConstants::kIsIdentifierStart.get(c0_))

return Token::ILLEGAL;

literal.Complete();

@@ -1114,7 +1054,7 @@

Token::Value Scanner::ScanIdentifier() {

- ASSERT(kIsIdentifierStart.get(c0_));

+ ASSERT(ScannerConstants::kIsIdentifierStart.get(c0_));

LiteralScope literal(this);

KeywordMatcher keyword_match;

@@ -1123,25 +1063,25 @@

if (c0_ == '\\') {

uc32 c = ScanIdentifierUnicodeEscape();

// Only allow legal identifier start characters.

- if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL;

- AddChar(c);

+ if (!ScannerConstants::kIsIdentifierStart.get(c)) return Token::ILLEGAL;

+ AddLiteralChar(c);

keyword_match.Fail();

} else {

- AddChar(c0_);

+ AddLiteralChar(c0_);

keyword_match.AddChar(c0_);

Advance();

}

// Scan the rest of the identifier characters.

- while (kIsIdentifierPart.get(c0_)) {

+ while (ScannerConstants::kIsIdentifierPart.get(c0_)) {

if (c0_ == '\\') {

uc32 c = ScanIdentifierUnicodeEscape();

// Only allow legal identifier part characters.

- if (!kIsIdentifierPart.get(c)) return Token::ILLEGAL;

- AddChar(c);

+ if (!ScannerConstants::kIsIdentifierPart.get(c)) return Token::ILLEGAL;

+ AddLiteralChar(c);

keyword_match.Fail();

} else {

- AddChar(c0_);

+ AddLiteralChar(c0_);

keyword_match.AddChar(c0_);

Advance();

}

@@ -1153,17 +1093,6 @@

-bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) {

- // Checks whether the buffer contains an identifier (no escape).

- if (!buffer->has_more()) return false;

- if (!kIsIdentifierStart.get(buffer->GetNext())) return false;

- while (buffer->has_more()) {

- if (!kIsIdentifierPart.get(buffer->GetNext())) return false;

- }

- return true;

bool Scanner::ScanRegExpPattern(bool seen_equal) {

// Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags

bool in_character_class = false;

@@ -1178,18 +1107,18 @@

// constructor.

LiteralScope literal(this);

if (seen_equal)

- AddChar('=');

+ AddLiteralChar('=');

while (c0_ != '/' || in_character_class) {

- if (kIsLineTerminator.get(c0_) || c0_ < 0) return false;

+ if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false;

if (c0_ == '\\') { // escaped character

- AddCharAdvance();

- if (kIsLineTerminator.get(c0_) || c0_ < 0) return false;

- AddCharAdvance();

+ AddLiteralCharAdvance();

+ if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false;

+ AddLiteralCharAdvance();

} else { // unescaped character

if (c0_ == '[') in_character_class = true;

if (c0_ == ']') in_character_class = false;

- AddCharAdvance();

+ AddLiteralCharAdvance();

}

Advance(); // consume '/'

@@ -1202,17 +1131,17 @@

bool Scanner::ScanRegExpFlags() {

// Scan regular expression flags.

LiteralScope literal(this);

- while (kIsIdentifierPart.get(c0_)) {

+ while (ScannerConstants::kIsIdentifierPart.get(c0_)) {

if (c0_ == '\\') {

uc32 c = ScanIdentifierUnicodeEscape();

if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) {

// We allow any escaped character, unlike the restriction on

// IdentifierPart when it is used to build an IdentifierName.

- AddChar(c);

+ AddLiteralChar(c);

continue;

}

- AddCharAdvance();

+ AddLiteralCharAdvance();

}

literal.Complete();

« no previous file with comments | « src/scanner.h ('k') | src/scanner-base.h » ('j') | no next file with comments »