Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1667)

Unified Diff: src/scanner.cc

Issue 5188006: Push version 2.5.7 to trunk.... (Closed) Base URL: http://v8.googlecode.com/svn/trunk/
Patch Set: Created 10 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/scanner.h ('k') | src/scanner-base.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/scanner.cc
===================================================================
--- src/scanner.cc (revision 5846)
+++ src/scanner.cc (working copy)
@@ -30,27 +30,15 @@
#include "ast.h"
#include "handles.h"
#include "scanner.h"
+#include "unicode-inl.h"
namespace v8 {
namespace internal {
// ----------------------------------------------------------------------------
-// Character predicates
-
-
-unibrow::Predicate<IdentifierStart, 128> Scanner::kIsIdentifierStart;
-unibrow::Predicate<IdentifierPart, 128> Scanner::kIsIdentifierPart;
-unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator;
-unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace;
-
-
-StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_;
-
-
-// ----------------------------------------------------------------------------
// UTF8Buffer
-UTF8Buffer::UTF8Buffer() : buffer_(kInitialCapacity) { }
+UTF8Buffer::UTF8Buffer() : buffer_(kInitialCapacity), recording_(false) { }
UTF8Buffer::~UTF8Buffer() {}
@@ -135,55 +123,6 @@
}
-// ExternalStringUTF16Buffer
-template <typename StringType, typename CharType>
-ExternalStringUTF16Buffer<StringType, CharType>::ExternalStringUTF16Buffer()
- : raw_data_(NULL) { }
-
-
-template <typename StringType, typename CharType>
-void ExternalStringUTF16Buffer<StringType, CharType>::Initialize(
- Handle<StringType> data,
- int start_position,
- int end_position) {
- ASSERT(!data.is_null());
- raw_data_ = data->resource()->data();
-
- ASSERT(end_position <= data->length());
- if (start_position > 0) {
- SeekForward(start_position);
- }
- end_ =
- end_position != Scanner::kNoEndPosition ? end_position : data->length();
-}
-
-
-template <typename StringType, typename CharType>
-uc32 ExternalStringUTF16Buffer<StringType, CharType>::Advance() {
- if (pos_ < end_) {
- return raw_data_[pos_++];
- } else {
- // note: currently the following increment is necessary to avoid a
- // test-parser problem!
- pos_++;
- return static_cast<uc32>(-1);
- }
-}
-
-
-template <typename StringType, typename CharType>
-void ExternalStringUTF16Buffer<StringType, CharType>::PushBack(uc32 ch) {
- pos_--;
- ASSERT(pos_ >= Scanner::kCharacterLookaheadBufferSize);
- ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch);
-}
-
-
-template <typename StringType, typename CharType>
-void ExternalStringUTF16Buffer<StringType, CharType>::SeekForward(int pos) {
- pos_ = pos;
-}
-
// ----------------------------------------------------------------------------
// Scanner::LiteralScope
@@ -309,7 +248,7 @@
}
-void Scanner::AddChar(uc32 c) {
+void Scanner::AddLiteralChar(uc32 c) {
literal_buffer_.AddChar(c);
}
@@ -324,8 +263,8 @@
}
-void Scanner::AddCharAdvance() {
- AddChar(c0_);
+void Scanner::AddLiteralCharAdvance() {
+ AddLiteralChar(c0_);
Advance();
}
@@ -358,9 +297,9 @@
while (true) {
// We treat byte-order marks (BOMs) as whitespace for better
// compatibility with Spidermonkey and other JavaScript engines.
- while (kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) {
+ while (ScannerConstants::kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) {
// IsWhiteSpace() includes line terminators!
- if (kIsLineTerminator.get(c0_)) {
+ if (ScannerConstants::kIsLineTerminator.get(c0_)) {
// Ignore line terminators, but remember them. This is necessary
// for automatic semicolon insertion.
has_line_terminator_before_next_ = true;
@@ -400,7 +339,7 @@
// separately by the lexical grammar and becomes part of the
// stream of input elements for the syntactic grammar (see
// ECMA-262, section 7.4, page 12).
- while (c0_ >= 0 && !kIsLineTerminator.get(c0_)) {
+ while (c0_ >= 0 && !ScannerConstants::kIsLineTerminator.get(c0_)) {
Advance();
}
@@ -537,29 +476,29 @@
// Check for control character (0x00-0x1f) or unterminated string (<0).
if (c0_ < 0x20) return Token::ILLEGAL;
if (c0_ != '\\') {
- AddCharAdvance();
+ AddLiteralCharAdvance();
} else {
Advance();
switch (c0_) {
case '"':
case '\\':
case '/':
- AddChar(c0_);
+ AddLiteralChar(c0_);
break;
case 'b':
- AddChar('\x08');
+ AddLiteralChar('\x08');
break;
case 'f':
- AddChar('\x0c');
+ AddLiteralChar('\x0c');
break;
case 'n':
- AddChar('\x0a');
+ AddLiteralChar('\x0a');
break;
case 'r':
- AddChar('\x0d');
+ AddLiteralChar('\x0d');
break;
case 't':
- AddChar('\x09');
+ AddLiteralChar('\x09');
break;
case 'u': {
uc32 value = 0;
@@ -571,7 +510,7 @@
}
value = value * 16 + digit;
}
- AddChar(value);
+ AddLiteralChar(value);
break;
}
default:
@@ -591,31 +530,31 @@
Token::Value Scanner::ScanJsonNumber() {
LiteralScope literal(this);
- if (c0_ == '-') AddCharAdvance();
+ if (c0_ == '-') AddLiteralCharAdvance();
if (c0_ == '0') {
- AddCharAdvance();
+ AddLiteralCharAdvance();
// Prefix zero is only allowed if it's the only digit before
// a decimal point or exponent.
if ('0' <= c0_ && c0_ <= '9') return Token::ILLEGAL;
} else {
if (c0_ < '1' || c0_ > '9') return Token::ILLEGAL;
do {
- AddCharAdvance();
+ AddLiteralCharAdvance();
} while (c0_ >= '0' && c0_ <= '9');
}
if (c0_ == '.') {
- AddCharAdvance();
+ AddLiteralCharAdvance();
if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL;
do {
- AddCharAdvance();
+ AddLiteralCharAdvance();
} while (c0_ >= '0' && c0_ <= '9');
}
if (AsciiAlphaToLower(c0_) == 'e') {
- AddCharAdvance();
- if (c0_ == '-' || c0_ == '+') AddCharAdvance();
+ AddLiteralCharAdvance();
+ if (c0_ == '-' || c0_ == '+') AddLiteralCharAdvance();
if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL;
do {
- AddCharAdvance();
+ AddLiteralCharAdvance();
} while (c0_ >= '0' && c0_ <= '9');
}
literal.Complete();
@@ -631,7 +570,7 @@
Advance();
text++;
}
- if (kIsIdentifierPart.get(c0_)) return Token::ILLEGAL;
+ if (ScannerConstants::kIsIdentifierPart.get(c0_)) return Token::ILLEGAL;
literal.Complete();
return token;
}
@@ -854,7 +793,7 @@
break;
default:
- if (kIsIdentifierStart.get(c0_)) {
+ if (ScannerConstants::kIsIdentifierStart.get(c0_)) {
token = ScanIdentifier();
} else if (IsDecimalDigit(c0_)) {
token = ScanNumber(false);
@@ -937,7 +876,7 @@
Advance();
// Skip escaped newlines.
- if (kIsLineTerminator.get(c)) {
+ if (ScannerConstants::kIsLineTerminator.get(c)) {
// Allow CR+LF newlines in multiline string literals.
if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();
// Allow LF+CR newlines in multiline string literals.
@@ -970,7 +909,7 @@
// According to ECMA-262, 3rd, 7.8.4 (p 18ff) these
// should be illegal, but they are commonly handled
// as non-escaped characters by JS VMs.
- AddChar(c);
+ AddLiteralChar(c);
}
@@ -979,14 +918,15 @@
Advance(); // consume quote
LiteralScope literal(this);
- while (c0_ != quote && c0_ >= 0 && !kIsLineTerminator.get(c0_)) {
+ while (c0_ != quote && c0_ >= 0
+ && !ScannerConstants::kIsLineTerminator.get(c0_)) {
uc32 c = c0_;
Advance();
if (c == '\\') {
if (c0_ < 0) return Token::ILLEGAL;
ScanEscape();
} else {
- AddChar(c);
+ AddLiteralChar(c);
}
}
if (c0_ != quote) return Token::ILLEGAL;
@@ -1017,7 +957,7 @@
// Returns true if any decimal digits were scanned, returns false otherwise.
void Scanner::ScanDecimalDigits() {
while (IsDecimalDigit(c0_))
- AddCharAdvance();
+ AddLiteralCharAdvance();
}
@@ -1029,25 +969,25 @@
LiteralScope literal(this);
if (seen_period) {
// we have already seen a decimal point of the float
- AddChar('.');
+ AddLiteralChar('.');
ScanDecimalDigits(); // we know we have at least one digit
} else {
// if the first character is '0' we must check for octals and hex
if (c0_ == '0') {
- AddCharAdvance();
+ AddLiteralCharAdvance();
// either 0, 0exxx, 0Exxx, 0.xxx, an octal number, or a hex number
if (c0_ == 'x' || c0_ == 'X') {
// hex number
kind = HEX;
- AddCharAdvance();
+ AddLiteralCharAdvance();
if (!IsHexDigit(c0_)) {
// we must have at least one hex digit after 'x'/'X'
return Token::ILLEGAL;
}
while (IsHexDigit(c0_)) {
- AddCharAdvance();
+ AddLiteralCharAdvance();
}
} else if ('0' <= c0_ && c0_ <= '7') {
// (possible) octal number
@@ -1058,7 +998,7 @@
break;
}
if (c0_ < '0' || '7' < c0_) break;
- AddCharAdvance();
+ AddLiteralCharAdvance();
}
}
}
@@ -1067,7 +1007,7 @@
if (kind == DECIMAL) {
ScanDecimalDigits(); // optional
if (c0_ == '.') {
- AddCharAdvance();
+ AddLiteralCharAdvance();
ScanDecimalDigits(); // optional
}
}
@@ -1078,9 +1018,9 @@
ASSERT(kind != HEX); // 'e'/'E' must be scanned as part of the hex number
if (kind == OCTAL) return Token::ILLEGAL; // no exponent for octals allowed
// scan exponent
- AddCharAdvance();
+ AddLiteralCharAdvance();
if (c0_ == '+' || c0_ == '-')
- AddCharAdvance();
+ AddLiteralCharAdvance();
if (!IsDecimalDigit(c0_)) {
// we must have at least one decimal digit after 'e'/'E'
return Token::ILLEGAL;
@@ -1092,7 +1032,7 @@
// not be an identifier start or a decimal digit; see ECMA-262
// section 7.8.3, page 17 (note that we read only one decimal digit
// if the value is 0).
- if (IsDecimalDigit(c0_) || kIsIdentifierStart.get(c0_))
+ if (IsDecimalDigit(c0_) || ScannerConstants::kIsIdentifierStart.get(c0_))
return Token::ILLEGAL;
literal.Complete();
@@ -1114,7 +1054,7 @@
Token::Value Scanner::ScanIdentifier() {
- ASSERT(kIsIdentifierStart.get(c0_));
+ ASSERT(ScannerConstants::kIsIdentifierStart.get(c0_));
LiteralScope literal(this);
KeywordMatcher keyword_match;
@@ -1123,25 +1063,25 @@
if (c0_ == '\\') {
uc32 c = ScanIdentifierUnicodeEscape();
// Only allow legal identifier start characters.
- if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL;
- AddChar(c);
+ if (!ScannerConstants::kIsIdentifierStart.get(c)) return Token::ILLEGAL;
+ AddLiteralChar(c);
keyword_match.Fail();
} else {
- AddChar(c0_);
+ AddLiteralChar(c0_);
keyword_match.AddChar(c0_);
Advance();
}
// Scan the rest of the identifier characters.
- while (kIsIdentifierPart.get(c0_)) {
+ while (ScannerConstants::kIsIdentifierPart.get(c0_)) {
if (c0_ == '\\') {
uc32 c = ScanIdentifierUnicodeEscape();
// Only allow legal identifier part characters.
- if (!kIsIdentifierPart.get(c)) return Token::ILLEGAL;
- AddChar(c);
+ if (!ScannerConstants::kIsIdentifierPart.get(c)) return Token::ILLEGAL;
+ AddLiteralChar(c);
keyword_match.Fail();
} else {
- AddChar(c0_);
+ AddLiteralChar(c0_);
keyword_match.AddChar(c0_);
Advance();
}
@@ -1153,17 +1093,6 @@
-bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) {
- // Checks whether the buffer contains an identifier (no escape).
- if (!buffer->has_more()) return false;
- if (!kIsIdentifierStart.get(buffer->GetNext())) return false;
- while (buffer->has_more()) {
- if (!kIsIdentifierPart.get(buffer->GetNext())) return false;
- }
- return true;
-}
-
-
bool Scanner::ScanRegExpPattern(bool seen_equal) {
// Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags
bool in_character_class = false;
@@ -1178,18 +1107,18 @@
// constructor.
LiteralScope literal(this);
if (seen_equal)
- AddChar('=');
+ AddLiteralChar('=');
while (c0_ != '/' || in_character_class) {
- if (kIsLineTerminator.get(c0_) || c0_ < 0) return false;
+ if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false;
if (c0_ == '\\') { // escaped character
- AddCharAdvance();
- if (kIsLineTerminator.get(c0_) || c0_ < 0) return false;
- AddCharAdvance();
+ AddLiteralCharAdvance();
+ if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false;
+ AddLiteralCharAdvance();
} else { // unescaped character
if (c0_ == '[') in_character_class = true;
if (c0_ == ']') in_character_class = false;
- AddCharAdvance();
+ AddLiteralCharAdvance();
}
}
Advance(); // consume '/'
@@ -1202,17 +1131,17 @@
bool Scanner::ScanRegExpFlags() {
// Scan regular expression flags.
LiteralScope literal(this);
- while (kIsIdentifierPart.get(c0_)) {
+ while (ScannerConstants::kIsIdentifierPart.get(c0_)) {
if (c0_ == '\\') {
uc32 c = ScanIdentifierUnicodeEscape();
if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) {
// We allow any escaped character, unlike the restriction on
// IdentifierPart when it is used to build an IdentifierName.
- AddChar(c);
+ AddLiteralChar(c);
continue;
}
}
- AddCharAdvance();
+ AddLiteralCharAdvance();
}
literal.Complete();
« no previous file with comments | « src/scanner.h ('k') | src/scanner-base.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698