Index: src/scanner.cc |
diff --git a/src/scanner.cc b/src/scanner.cc |
old mode 100644 |
new mode 100755 |
index 0d3b789f9b31a6a501748bbb4b3dd0ae61bdd755..cf7e49f850f6d3203677d0cfe033d24df2ffd5fa |
--- a/src/scanner.cc |
+++ b/src/scanner.cc |
@@ -323,11 +323,14 @@ void KeywordMatcher::Step(uc32 input) { |
// ---------------------------------------------------------------------------- |
// Scanner |
-Scanner::Scanner(bool pre) : stack_overflow_(false), is_pre_parsing_(pre) { } |
+Scanner::Scanner(ParserMode pre) |
+ : stack_overflow_(false), is_pre_parsing_(pre == PREPARSE) { } |
-void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream, |
- int position) { |
+void Scanner::Init(Handle<String> source, |
+ unibrow::CharacterStream* stream, |
+ int position, |
+ ParserLanguage language) { |
// Initialize the source buffer. |
if (!source.is_null() && StringShape(*source).IsExternalTwoByte()) { |
two_byte_string_buffer_.Initialize( |
@@ -339,6 +342,7 @@ void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream, |
} |
position_ = position; |
+ is_parsing_json_ = (language == JSON); |
// Set c0_ (one character ahead) |
ASSERT(kCharacterLookaheadBufferSize == 1); |
@@ -416,7 +420,17 @@ static inline bool IsByteOrderMark(uc32 c) { |
} |
-bool Scanner::SkipWhiteSpace() { |
+bool Scanner::SkipJsonWhiteSpace() { |
+ int start_position = source_pos(); |
+ // JSON WhiteSpace is tab, carrige-return, newline and space. |
+ while (c0_ == ' ' || c0_ == '\n' || c0_ == '\r' || c0_ == '\t') { |
+ Advance(); |
+ } |
+ return source_pos() != start_position; |
+} |
+ |
+ |
+bool Scanner::SkipJavaScriptWhiteSpace() { |
int start_position = source_pos(); |
while (true) { |
@@ -512,7 +526,194 @@ Token::Value Scanner::ScanHtmlComment() { |
} |
-void Scanner::Scan() { |
+ |
+void Scanner::ScanJson() { |
+ next_.literal_buffer = NULL; |
+ Token::Value token; |
+ has_line_terminator_before_next_ = false; |
+ do { |
+ // Remember the position of the next token |
+ next_.location.beg_pos = source_pos(); |
+ switch (c0_) { |
+ case '\t': |
+ case '\r': |
+ case '\n': |
+ case ' ': |
+ Advance(); |
+ token = Token::WHITESPACE; |
+ break; |
+ case '{': |
+ Advance(); |
+ token = Token::LBRACE; |
+ break; |
+ case '}': |
+ Advance(); |
+ token = Token::RBRACE; |
+ break; |
+ case '[': |
+ Advance(); |
+ token = Token::LBRACK; |
+ break; |
+ case ']': |
+ Advance(); |
+ token = Token::RBRACK; |
+ break; |
+ case ':': |
+ Advance(); |
+ token = Token::COLON; |
+ break; |
+ case ',': |
+ Advance(); |
+ token = Token::COMMA; |
+ break; |
+ case '"': |
+ token = ScanJsonString(); |
+ break; |
+ case '-': |
+ case '0': |
+ case '1': |
+ case '2': |
+ case '3': |
+ case '4': |
+ case '5': |
+ case '6': |
+ case '7': |
+ case '8': |
+ case '9': |
+ token = ScanJsonNumber(); |
+ break; |
+ case 't': |
+ token = ScanJsonIdentifier("true", Token::TRUE_LITERAL); |
+ break; |
+ case 'f': |
+ token = ScanJsonIdentifier("false", Token::FALSE_LITERAL); |
+ break; |
+ case 'n': |
+ token = ScanJsonIdentifier("null", Token::NULL_LITERAL); |
+ break; |
+ default: |
+ if (c0_ < 0) { |
+ Advance(); |
+ token = Token::EOS; |
+ } else { |
+ Advance(); |
+ token = Select(Token::ILLEGAL); |
+ } |
+ } |
+ } while (token == Token::WHITESPACE); |
+ |
+ next_.location.end_pos = source_pos(); |
+ next_.token = token; |
+} |
+ |
+ |
+Token::Value Scanner::ScanJsonString() { |
+ ASSERT_EQ('"', c0_); |
+ Advance(); |
+ StartLiteral(); |
+ while (c0_ != '"' && c0_ > 0) { |
+ // Check for control character (0x00-0x1f) or unterminated string (<0). |
+ if (c0_ < 0x20) return Token::ILLEGAL; |
+ if (c0_ != '\\') { |
+ AddCharAdvance(); |
+ } else { |
+ Advance(); |
+ switch (c0_) { |
+ case '"': |
+ case '\\': |
+ case '/': |
+ AddChar(c0_); |
+ break; |
+ case 'b': |
+ AddChar('\x08'); |
+ break; |
+ case 'f': |
+ AddChar('\x0c'); |
+ break; |
+ case 'n': |
+ AddChar('\x0a'); |
+ break; |
+ case 'r': |
+ AddChar('\x0d'); |
+ break; |
+ case 't': |
+ AddChar('\x09'); |
+ break; |
+ case 'u': { |
+ uc32 value = 0; |
+ for (int i = 0; i < 4; i++) { |
+ Advance(); |
+ int digit = HexValue(c0_); |
+ if (digit < 0) return Token::ILLEGAL; |
+ value = value * 16 + digit; |
+ } |
+ AddChar(value); |
+ break; |
+ } |
+ default: |
+ return Token::ILLEGAL; |
+ } |
+ Advance(); |
+ } |
+ } |
+ if (c0_ != '"') { |
+ return Token::ILLEGAL; |
+ } |
+ TerminateLiteral(); |
+ Advance(); |
+ return Token::STRING; |
+} |
+ |
+ |
+Token::Value Scanner::ScanJsonNumber() { |
+ StartLiteral(); |
+ if (c0_ == '-') AddCharAdvance(); |
+ if (c0_ == '0') { |
+ AddCharAdvance(); |
+ // Prefix zero is only allowed if it's the only digit before |
+ // a decimal point or exponent. |
+ if ('0' <= c0_ && c0_ <= '9') return Token::ILLEGAL; |
+ } else { |
+ if (c0_ < '1' || c0_ > '9') return Token::ILLEGAL; |
+ do { |
+ AddCharAdvance(); |
+ } while (c0_ >= '0' && c0_ <= '9'); |
+ } |
+ if (c0_ == '.') { |
+ AddCharAdvance(); |
+ if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL; |
+ do { |
+ AddCharAdvance(); |
+ } while (c0_ >= '0' && c0_ <= '9'); |
+ } |
+ if ((c0_ | 0x20) == 'e') { |
+ AddCharAdvance(); |
+ if (c0_ == '-' || c0_ == '+') AddCharAdvance(); |
+ if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL; |
Rico
2010/02/01 10:40:58
According to spec this can actually be 0 (Exponent
Lasse Reichstein
2010/02/01 12:17:30
This code does allow a zero digit after the "e".
A
|
+ do { |
+ AddCharAdvance(); |
+ } while (c0_ >= '0' && c0_ <= '9'); |
+ } |
+ TerminateLiteral(); |
+ return Token::NUMBER; |
+} |
+ |
+ |
+Token::Value Scanner::ScanJsonIdentifier(const char* text, |
+ Token::Value token) { |
+ StartLiteral(); |
+ while (*text != '\0') { |
+ if (c0_ != *text) return Token::ILLEGAL; |
+ Advance(); |
+ text++; |
+ } |
+ if (kIsIdentifierPart.get(c0_)) return Token::ILLEGAL; |
+ TerminateLiteral(); |
+ return token; |
+} |
+ |
+ |
+void Scanner::ScanJavaScript() { |
next_.literal_buffer = NULL; |
Token::Value token; |
has_line_terminator_before_next_ = false; |