Index: src/scanner.cc |
=================================================================== |
--- src/scanner.cc (revision 5846) |
+++ src/scanner.cc (working copy) |
@@ -30,27 +30,15 @@ |
#include "ast.h" |
#include "handles.h" |
#include "scanner.h" |
+#include "unicode-inl.h" |
namespace v8 { |
namespace internal { |
// ---------------------------------------------------------------------------- |
-// Character predicates |
- |
- |
-unibrow::Predicate<IdentifierStart, 128> Scanner::kIsIdentifierStart; |
-unibrow::Predicate<IdentifierPart, 128> Scanner::kIsIdentifierPart; |
-unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator; |
-unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace; |
- |
- |
-StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_; |
- |
- |
-// ---------------------------------------------------------------------------- |
// UTF8Buffer |
-UTF8Buffer::UTF8Buffer() : buffer_(kInitialCapacity) { } |
+UTF8Buffer::UTF8Buffer() : buffer_(kInitialCapacity), recording_(false) { } |
UTF8Buffer::~UTF8Buffer() {} |
@@ -135,55 +123,6 @@ |
} |
-// ExternalStringUTF16Buffer |
-template <typename StringType, typename CharType> |
-ExternalStringUTF16Buffer<StringType, CharType>::ExternalStringUTF16Buffer() |
- : raw_data_(NULL) { } |
- |
- |
-template <typename StringType, typename CharType> |
-void ExternalStringUTF16Buffer<StringType, CharType>::Initialize( |
- Handle<StringType> data, |
- int start_position, |
- int end_position) { |
- ASSERT(!data.is_null()); |
- raw_data_ = data->resource()->data(); |
- |
- ASSERT(end_position <= data->length()); |
- if (start_position > 0) { |
- SeekForward(start_position); |
- } |
- end_ = |
- end_position != Scanner::kNoEndPosition ? end_position : data->length(); |
-} |
- |
- |
-template <typename StringType, typename CharType> |
-uc32 ExternalStringUTF16Buffer<StringType, CharType>::Advance() { |
- if (pos_ < end_) { |
- return raw_data_[pos_++]; |
- } else { |
- // note: currently the following increment is necessary to avoid a |
- // test-parser problem! |
- pos_++; |
- return static_cast<uc32>(-1); |
- } |
-} |
- |
- |
-template <typename StringType, typename CharType> |
-void ExternalStringUTF16Buffer<StringType, CharType>::PushBack(uc32 ch) { |
- pos_--; |
- ASSERT(pos_ >= Scanner::kCharacterLookaheadBufferSize); |
- ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch); |
-} |
- |
- |
-template <typename StringType, typename CharType> |
-void ExternalStringUTF16Buffer<StringType, CharType>::SeekForward(int pos) { |
- pos_ = pos; |
-} |
- |
// ---------------------------------------------------------------------------- |
// Scanner::LiteralScope |
@@ -309,7 +248,7 @@ |
} |
-void Scanner::AddChar(uc32 c) { |
+void Scanner::AddLiteralChar(uc32 c) { |
literal_buffer_.AddChar(c); |
} |
@@ -324,8 +263,8 @@ |
} |
-void Scanner::AddCharAdvance() { |
- AddChar(c0_); |
+void Scanner::AddLiteralCharAdvance() { |
+ AddLiteralChar(c0_); |
Advance(); |
} |
@@ -358,9 +297,9 @@ |
while (true) { |
// We treat byte-order marks (BOMs) as whitespace for better |
// compatibility with Spidermonkey and other JavaScript engines. |
- while (kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) { |
+ while (ScannerConstants::kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) { |
// IsWhiteSpace() includes line terminators! |
- if (kIsLineTerminator.get(c0_)) { |
+ if (ScannerConstants::kIsLineTerminator.get(c0_)) { |
// Ignore line terminators, but remember them. This is necessary |
// for automatic semicolon insertion. |
has_line_terminator_before_next_ = true; |
@@ -400,7 +339,7 @@ |
// separately by the lexical grammar and becomes part of the |
// stream of input elements for the syntactic grammar (see |
// ECMA-262, section 7.4, page 12). |
- while (c0_ >= 0 && !kIsLineTerminator.get(c0_)) { |
+ while (c0_ >= 0 && !ScannerConstants::kIsLineTerminator.get(c0_)) { |
Advance(); |
} |
@@ -537,29 +476,29 @@ |
// Check for control character (0x00-0x1f) or unterminated string (<0). |
if (c0_ < 0x20) return Token::ILLEGAL; |
if (c0_ != '\\') { |
- AddCharAdvance(); |
+ AddLiteralCharAdvance(); |
} else { |
Advance(); |
switch (c0_) { |
case '"': |
case '\\': |
case '/': |
- AddChar(c0_); |
+ AddLiteralChar(c0_); |
break; |
case 'b': |
- AddChar('\x08'); |
+ AddLiteralChar('\x08'); |
break; |
case 'f': |
- AddChar('\x0c'); |
+ AddLiteralChar('\x0c'); |
break; |
case 'n': |
- AddChar('\x0a'); |
+ AddLiteralChar('\x0a'); |
break; |
case 'r': |
- AddChar('\x0d'); |
+ AddLiteralChar('\x0d'); |
break; |
case 't': |
- AddChar('\x09'); |
+ AddLiteralChar('\x09'); |
break; |
case 'u': { |
uc32 value = 0; |
@@ -571,7 +510,7 @@ |
} |
value = value * 16 + digit; |
} |
- AddChar(value); |
+ AddLiteralChar(value); |
break; |
} |
default: |
@@ -591,31 +530,31 @@ |
Token::Value Scanner::ScanJsonNumber() { |
LiteralScope literal(this); |
- if (c0_ == '-') AddCharAdvance(); |
+ if (c0_ == '-') AddLiteralCharAdvance(); |
if (c0_ == '0') { |
- AddCharAdvance(); |
+ AddLiteralCharAdvance(); |
// Prefix zero is only allowed if it's the only digit before |
// a decimal point or exponent. |
if ('0' <= c0_ && c0_ <= '9') return Token::ILLEGAL; |
} else { |
if (c0_ < '1' || c0_ > '9') return Token::ILLEGAL; |
do { |
- AddCharAdvance(); |
+ AddLiteralCharAdvance(); |
} while (c0_ >= '0' && c0_ <= '9'); |
} |
if (c0_ == '.') { |
- AddCharAdvance(); |
+ AddLiteralCharAdvance(); |
if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL; |
do { |
- AddCharAdvance(); |
+ AddLiteralCharAdvance(); |
} while (c0_ >= '0' && c0_ <= '9'); |
} |
if (AsciiAlphaToLower(c0_) == 'e') { |
- AddCharAdvance(); |
- if (c0_ == '-' || c0_ == '+') AddCharAdvance(); |
+ AddLiteralCharAdvance(); |
+ if (c0_ == '-' || c0_ == '+') AddLiteralCharAdvance(); |
if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL; |
do { |
- AddCharAdvance(); |
+ AddLiteralCharAdvance(); |
} while (c0_ >= '0' && c0_ <= '9'); |
} |
literal.Complete(); |
@@ -631,7 +570,7 @@ |
Advance(); |
text++; |
} |
- if (kIsIdentifierPart.get(c0_)) return Token::ILLEGAL; |
+ if (ScannerConstants::kIsIdentifierPart.get(c0_)) return Token::ILLEGAL; |
literal.Complete(); |
return token; |
} |
@@ -854,7 +793,7 @@ |
break; |
default: |
- if (kIsIdentifierStart.get(c0_)) { |
+ if (ScannerConstants::kIsIdentifierStart.get(c0_)) { |
token = ScanIdentifier(); |
} else if (IsDecimalDigit(c0_)) { |
token = ScanNumber(false); |
@@ -937,7 +876,7 @@ |
Advance(); |
// Skip escaped newlines. |
- if (kIsLineTerminator.get(c)) { |
+ if (ScannerConstants::kIsLineTerminator.get(c)) { |
// Allow CR+LF newlines in multiline string literals. |
if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); |
// Allow LF+CR newlines in multiline string literals. |
@@ -970,7 +909,7 @@ |
// According to ECMA-262, 3rd, 7.8.4 (p 18ff) these |
// should be illegal, but they are commonly handled |
// as non-escaped characters by JS VMs. |
- AddChar(c); |
+ AddLiteralChar(c); |
} |
@@ -979,14 +918,15 @@ |
Advance(); // consume quote |
LiteralScope literal(this); |
- while (c0_ != quote && c0_ >= 0 && !kIsLineTerminator.get(c0_)) { |
+ while (c0_ != quote && c0_ >= 0 |
+ && !ScannerConstants::kIsLineTerminator.get(c0_)) { |
uc32 c = c0_; |
Advance(); |
if (c == '\\') { |
if (c0_ < 0) return Token::ILLEGAL; |
ScanEscape(); |
} else { |
- AddChar(c); |
+ AddLiteralChar(c); |
} |
} |
if (c0_ != quote) return Token::ILLEGAL; |
@@ -1017,7 +957,7 @@ |
// Returns true if any decimal digits were scanned, returns false otherwise. |
void Scanner::ScanDecimalDigits() { |
while (IsDecimalDigit(c0_)) |
- AddCharAdvance(); |
+ AddLiteralCharAdvance(); |
} |
@@ -1029,25 +969,25 @@ |
LiteralScope literal(this); |
if (seen_period) { |
// we have already seen a decimal point of the float |
- AddChar('.'); |
+ AddLiteralChar('.'); |
ScanDecimalDigits(); // we know we have at least one digit |
} else { |
// if the first character is '0' we must check for octals and hex |
if (c0_ == '0') { |
- AddCharAdvance(); |
+ AddLiteralCharAdvance(); |
// either 0, 0exxx, 0Exxx, 0.xxx, an octal number, or a hex number |
if (c0_ == 'x' || c0_ == 'X') { |
// hex number |
kind = HEX; |
- AddCharAdvance(); |
+ AddLiteralCharAdvance(); |
if (!IsHexDigit(c0_)) { |
// we must have at least one hex digit after 'x'/'X' |
return Token::ILLEGAL; |
} |
while (IsHexDigit(c0_)) { |
- AddCharAdvance(); |
+ AddLiteralCharAdvance(); |
} |
} else if ('0' <= c0_ && c0_ <= '7') { |
// (possible) octal number |
@@ -1058,7 +998,7 @@ |
break; |
} |
if (c0_ < '0' || '7' < c0_) break; |
- AddCharAdvance(); |
+ AddLiteralCharAdvance(); |
} |
} |
} |
@@ -1067,7 +1007,7 @@ |
if (kind == DECIMAL) { |
ScanDecimalDigits(); // optional |
if (c0_ == '.') { |
- AddCharAdvance(); |
+ AddLiteralCharAdvance(); |
ScanDecimalDigits(); // optional |
} |
} |
@@ -1078,9 +1018,9 @@ |
ASSERT(kind != HEX); // 'e'/'E' must be scanned as part of the hex number |
if (kind == OCTAL) return Token::ILLEGAL; // no exponent for octals allowed |
// scan exponent |
- AddCharAdvance(); |
+ AddLiteralCharAdvance(); |
if (c0_ == '+' || c0_ == '-') |
- AddCharAdvance(); |
+ AddLiteralCharAdvance(); |
if (!IsDecimalDigit(c0_)) { |
// we must have at least one decimal digit after 'e'/'E' |
return Token::ILLEGAL; |
@@ -1092,7 +1032,7 @@ |
// not be an identifier start or a decimal digit; see ECMA-262 |
// section 7.8.3, page 17 (note that we read only one decimal digit |
// if the value is 0). |
- if (IsDecimalDigit(c0_) || kIsIdentifierStart.get(c0_)) |
+ if (IsDecimalDigit(c0_) || ScannerConstants::kIsIdentifierStart.get(c0_)) |
return Token::ILLEGAL; |
literal.Complete(); |
@@ -1114,7 +1054,7 @@ |
Token::Value Scanner::ScanIdentifier() { |
- ASSERT(kIsIdentifierStart.get(c0_)); |
+ ASSERT(ScannerConstants::kIsIdentifierStart.get(c0_)); |
LiteralScope literal(this); |
KeywordMatcher keyword_match; |
@@ -1123,25 +1063,25 @@ |
if (c0_ == '\\') { |
uc32 c = ScanIdentifierUnicodeEscape(); |
// Only allow legal identifier start characters. |
- if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL; |
- AddChar(c); |
+ if (!ScannerConstants::kIsIdentifierStart.get(c)) return Token::ILLEGAL; |
+ AddLiteralChar(c); |
keyword_match.Fail(); |
} else { |
- AddChar(c0_); |
+ AddLiteralChar(c0_); |
keyword_match.AddChar(c0_); |
Advance(); |
} |
// Scan the rest of the identifier characters. |
- while (kIsIdentifierPart.get(c0_)) { |
+ while (ScannerConstants::kIsIdentifierPart.get(c0_)) { |
if (c0_ == '\\') { |
uc32 c = ScanIdentifierUnicodeEscape(); |
// Only allow legal identifier part characters. |
- if (!kIsIdentifierPart.get(c)) return Token::ILLEGAL; |
- AddChar(c); |
+ if (!ScannerConstants::kIsIdentifierPart.get(c)) return Token::ILLEGAL; |
+ AddLiteralChar(c); |
keyword_match.Fail(); |
} else { |
- AddChar(c0_); |
+ AddLiteralChar(c0_); |
keyword_match.AddChar(c0_); |
Advance(); |
} |
@@ -1153,17 +1093,6 @@ |
-bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) { |
- // Checks whether the buffer contains an identifier (no escape). |
- if (!buffer->has_more()) return false; |
- if (!kIsIdentifierStart.get(buffer->GetNext())) return false; |
- while (buffer->has_more()) { |
- if (!kIsIdentifierPart.get(buffer->GetNext())) return false; |
- } |
- return true; |
-} |
- |
- |
bool Scanner::ScanRegExpPattern(bool seen_equal) { |
// Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags |
bool in_character_class = false; |
@@ -1178,18 +1107,18 @@ |
// constructor. |
LiteralScope literal(this); |
if (seen_equal) |
- AddChar('='); |
+ AddLiteralChar('='); |
while (c0_ != '/' || in_character_class) { |
- if (kIsLineTerminator.get(c0_) || c0_ < 0) return false; |
+ if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false; |
if (c0_ == '\\') { // escaped character |
- AddCharAdvance(); |
- if (kIsLineTerminator.get(c0_) || c0_ < 0) return false; |
- AddCharAdvance(); |
+ AddLiteralCharAdvance(); |
+ if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false; |
+ AddLiteralCharAdvance(); |
} else { // unescaped character |
if (c0_ == '[') in_character_class = true; |
if (c0_ == ']') in_character_class = false; |
- AddCharAdvance(); |
+ AddLiteralCharAdvance(); |
} |
} |
Advance(); // consume '/' |
@@ -1202,17 +1131,17 @@ |
bool Scanner::ScanRegExpFlags() { |
// Scan regular expression flags. |
LiteralScope literal(this); |
- while (kIsIdentifierPart.get(c0_)) { |
+ while (ScannerConstants::kIsIdentifierPart.get(c0_)) { |
if (c0_ == '\\') { |
uc32 c = ScanIdentifierUnicodeEscape(); |
if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) { |
// We allow any escaped character, unlike the restriction on |
// IdentifierPart when it is used to build an IdentifierName. |
- AddChar(c); |
+ AddLiteralChar(c); |
continue; |
} |
} |
- AddCharAdvance(); |
+ AddLiteralCharAdvance(); |
} |
literal.Complete(); |