Chromium Code Reviews| Index: src/json-parser.cc |
| =================================================================== |
| --- src/json-parser.cc (revision 8119) |
| +++ src/json-parser.cc (working copy) |
| @@ -53,50 +53,43 @@ |
| // Set initial position right before the string. |
| position_ = -1; |
| // Advance to the first character (posibly EOS) |
| - Advance(); |
| - Next(); |
| + AdvanceWS(); |
| Handle<Object> result = ParseJsonValue(); |
| - if (result.is_null() || Next() != Token::EOS) { |
| - // Parse failed. Scanner's current token is the unexpected token. |
| - Token::Value token = current_.token; |
| + if (result.is_null() || c0_ != kEndOfString) { |
| + // Parse failed. Current character is the unexpected token. |
| const char* message; |
| - const char* name_opt = NULL; |
| - switch (token) { |
| - case Token::EOS: |
| + switch (c0_) { |
| + case kEndOfString: |
| message = "unexpected_eos"; |
| break; |
| - case Token::NUMBER: |
| + case '-': |
| + case '0': |
| + case '1': |
| + case '2': |
| + case '3': |
| + case '4': |
| + case '5': |
| + case '6': |
| + case '7': |
| + case '8': |
| + case '9': |
| message = "unexpected_token_number"; |
| break; |
| - case Token::STRING: |
| + case '"': |
| message = "unexpected_token_string"; |
|
Lasse Reichstein
2011/06/01 11:03:58
This might change the behavior for unterminated st
sandholm
2011/06/01 13:45:20
Done.
|
| break; |
| - case Token::IDENTIFIER: |
| - case Token::FUTURE_RESERVED_WORD: |
| - message = "unexpected_token_identifier"; |
| - break; |
| default: |
|
Lasse Reichstein
2011/06/01 11:03:58
Agree, it's probably better to not have the unexpe
sandholm
2011/06/01 13:45:20
Done.
|
| message = "unexpected_token"; |
|
Lasse Reichstein
2011/06/01 11:03:58
The unexpected_token message needs/expects second
sandholm
2011/06/01 13:45:20
Done.
|
| - name_opt = Token::String(token); |
| - ASSERT(name_opt != NULL); |
| break; |
| } |
| Factory* factory = isolate()->factory(); |
| MessageLocation location(factory->NewScript(source), |
| - current_.beg_pos, |
| - current_.end_pos); |
| - Handle<JSArray> array; |
| - if (name_opt == NULL) { |
| - array = factory->NewJSArray(0); |
| - } else { |
| - Handle<String> name = factory->NewStringFromUtf8(CStrVector(name_opt)); |
| - Handle<FixedArray> element = factory->NewFixedArray(1); |
| - element->set(0, *name); |
| - array = factory->NewJSArrayWithElements(element); |
| - } |
| + position_, |
| + position_ + 1); |
| + Handle<JSArray> array = factory->NewJSArray(0); |
| Handle<Object> result = factory->NewSyntaxError(message, array); |
| isolate()->Throw(*result, &location); |
| return Handle<Object>::null(); |
| @@ -107,21 +100,48 @@ |
| // Parse any JSON value. |
| Handle<Object> JsonParser::ParseJsonValue() { |
| - Token::Value token = Next(); |
| - switch (token) { |
| - case Token::STRING: |
| - return GetString(false); |
| - case Token::NUMBER: |
| - return isolate()->factory()->NewNumber(number_); |
| - case Token::FALSE_LITERAL: |
| - return isolate()->factory()->false_value(); |
| - case Token::TRUE_LITERAL: |
| - return isolate()->factory()->true_value(); |
| - case Token::NULL_LITERAL: |
| - return isolate()->factory()->null_value(); |
| - case Token::LBRACE: |
| + switch (c0_) { |
| + case '"': |
| + return ParseJsonString(); |
| + case '-': |
| + case '0': |
| + case '1': |
| + case '2': |
| + case '3': |
| + case '4': |
| + case '5': |
| + case '6': |
| + case '7': |
| + case '8': |
| + case '9': |
| + return ParseJsonNumber(); |
| + case 'f': |
| + if (AdvanceGetChar() == 'a' && AdvanceGetChar() == 'l' && |
| + AdvanceGetChar() == 's' && AdvanceGetChar() == 'e') { |
| + AdvanceWS(); |
|
Lasse Reichstein
2011/06/01 11:03:58
Don't use AdvanceWS here, just check for non-ident
sandholm
2011/06/01 13:45:20
I think this is fine. The invariant is that we're
|
| + return isolate()->factory()->false_value(); |
| + } else { |
| + return ReportUnexpectedToken(); |
|
Lasse Reichstein
2011/06/01 11:03:58
Maybe change the name, now that we don't use Token
sandholm
2011/06/01 13:45:20
Done.
|
| + } |
| + case 't': |
| + if (AdvanceGetChar() == 'r' && AdvanceGetChar() == 'u' && |
| + AdvanceGetChar() == 'e') { |
| + AdvanceWS(); |
|
Lasse Reichstein
2011/06/01 11:03:58
As above.
sandholm
2011/06/01 13:45:20
ditto
|
| + return isolate()->factory()->true_value(); |
| + } else { |
| + return ReportUnexpectedToken(); |
| + } |
| + case 'n': |
| + if (AdvanceGetChar() == 'u' && AdvanceGetChar() == 'l' && |
| + AdvanceGetChar() == 'l') { |
| + AdvanceWS(); |
|
Lasse Reichstein
2011/06/01 11:03:58
And again.
sandholm
2011/06/01 13:45:20
ditto
|
| + return isolate()->factory()->null_value(); |
| + } else { |
| + return ReportUnexpectedToken(); |
| + } |
| + case '{': |
| return ParseJsonObject(); |
| - case Token::LBRACK: |
| + case '[': |
| return ParseJsonArray(); |
| default: |
| return ReportUnexpectedToken(); |
| @@ -129,27 +149,21 @@ |
| } |
| -// Parse a JSON object. Scanner must be right after '{' token. |
| +// Parse a JSON object. Position must be right at '{'. |
| Handle<Object> JsonParser::ParseJsonObject() { |
| Handle<JSFunction> object_constructor( |
| isolate()->global_context()->object_function()); |
| Handle<JSObject> json_object = |
| isolate()->factory()->NewJSObject(object_constructor); |
| - if (Peek() == Token::RBRACE) { |
| - Next(); |
| - } else { |
| + AdvanceWS(); |
| + if (c0_ != '}') { |
| do { |
| - if (Next() != Token::STRING) { |
| - return ReportUnexpectedToken(); |
| - } |
| - Handle<String> key = GetString(true); |
| - if (Next() != Token::COLON) { |
| - return ReportUnexpectedToken(); |
| - } |
| - |
| + Handle<String> key = ParseJsonSymbol(); |
| + if (key.is_null() || c0_ != ':') return ReportUnexpectedToken(); |
| + AdvanceWS(); |
| Handle<Object> value = ParseJsonValue(); |
| - if (value.is_null()) return Handle<Object>::null(); |
| + if (value.is_null()) return ReportUnexpectedToken(); |
| uint32_t index; |
| if (key->AsArrayIndex(&index)) { |
| @@ -159,150 +173,45 @@ |
| } else { |
| SetLocalPropertyIgnoreAttributes(json_object, key, value, NONE); |
| } |
| - } while (Next() == Token::COMMA); |
| - if (current_.token != Token::RBRACE) { |
| + } while (c0_ == ',' && AdvanceWS()); |
|
Lasse Reichstein
2011/06/01 11:03:58
Ah, so that's why AdvanceWS returns true.
Don't do
sandholm
2011/06/01 13:45:20
Fixed with a AdvanceWhiteSpacesOnlyIfMatch(',') ca
|
| + if (c0_ != '}') { |
| return ReportUnexpectedToken(); |
| } |
| } |
| + AdvanceWS(); |
| return json_object; |
| } |
| -// Parse a JSON array. Scanner must be right after '[' token. |
| +// Parse a JSON array. Position must be right at '['. |
| Handle<Object> JsonParser::ParseJsonArray() { |
|
Lasse Reichstein
2011/06/01 11:03:58
If position must be at '[', do
ASSERT_EQ(c0_, '[
sandholm
2011/06/01 13:45:20
Done.
|
| ZoneScope zone_scope(isolate(), DELETE_ON_EXIT); |
| ZoneList<Handle<Object> > elements(4); |
| - Token::Value token = Peek(); |
| - if (token == Token::RBRACK) { |
| - Next(); |
| - } else { |
| + AdvanceWS(); |
| + if (c0_ != ']') { |
| do { |
| Handle<Object> element = ParseJsonValue(); |
| - if (element.is_null()) return Handle<Object>::null(); |
| + if (element.is_null()) return ReportUnexpectedToken(); |
| elements.Add(element); |
| - token = Next(); |
| - } while (token == Token::COMMA); |
| - if (token != Token::RBRACK) { |
| + } while (c0_ == ',' && AdvanceWS()); |
| + if (c0_ != ']') { |
| return ReportUnexpectedToken(); |
| } |
| } |
| - |
| + AdvanceWS(); |
| // Allocate a fixed array with all the elements. |
| Handle<FixedArray> fast_elements = |
| isolate()->factory()->NewFixedArray(elements.length()); |
| - |
| for (int i = 0, n = elements.length(); i < n; i++) { |
| fast_elements->set(i, *elements[i]); |
| } |
| - |
| return isolate()->factory()->NewJSArrayWithElements(fast_elements); |
| } |
| -Token::Value JsonParser::Next() { |
| - current_ = next_; |
| - ScanJson(); |
| - return current_.token; |
| -} |
| - |
| -void JsonParser::ScanJson() { |
| - if (source_->IsSeqAsciiString()) { |
| - is_sequential_ascii_ = true; |
| - } else { |
| - is_sequential_ascii_ = false; |
| - } |
| - |
| - Token::Value token; |
| - do { |
| - // Remember the position of the next token |
| - next_.beg_pos = position_; |
| - switch (c0_) { |
| - case '\t': |
| - case '\r': |
| - case '\n': |
| - case ' ': |
| - Advance(); |
| - token = Token::WHITESPACE; |
| - break; |
| - case '{': |
| - Advance(); |
| - token = Token::LBRACE; |
| - break; |
| - case '}': |
| - Advance(); |
| - token = Token::RBRACE; |
| - break; |
| - case '[': |
| - Advance(); |
| - token = Token::LBRACK; |
| - break; |
| - case ']': |
| - Advance(); |
| - token = Token::RBRACK; |
| - break; |
| - case ':': |
| - Advance(); |
| - token = Token::COLON; |
| - break; |
| - case ',': |
| - Advance(); |
| - token = Token::COMMA; |
| - break; |
| - case '"': |
| - token = ScanJsonString(); |
| - break; |
| - case '-': |
| - case '0': |
| - case '1': |
| - case '2': |
| - case '3': |
| - case '4': |
| - case '5': |
| - case '6': |
| - case '7': |
| - case '8': |
| - case '9': |
| - token = ScanJsonNumber(); |
| - break; |
| - case 't': |
| - token = ScanJsonIdentifier("true", Token::TRUE_LITERAL); |
| - break; |
| - case 'f': |
| - token = ScanJsonIdentifier("false", Token::FALSE_LITERAL); |
| - break; |
| - case 'n': |
| - token = ScanJsonIdentifier("null", Token::NULL_LITERAL); |
| - break; |
| - default: |
| - if (c0_ < 0) { |
| - Advance(); |
| - token = Token::EOS; |
| - } else { |
| - Advance(); |
| - token = Token::ILLEGAL; |
| - } |
| - } |
| - } while (token == Token::WHITESPACE); |
| - |
| - next_.end_pos = position_; |
| - next_.token = token; |
| -} |
| - |
| - |
| -Token::Value JsonParser::ScanJsonIdentifier(const char* text, |
| - Token::Value token) { |
| - while (*text != '\0') { |
| - if (c0_ != *text) return Token::ILLEGAL; |
| - Advance(); |
| - text++; |
| - } |
| - return token; |
| -} |
| - |
| - |
| -Token::Value JsonParser::ScanJsonNumber() { |
| +Handle<Object> JsonParser::ParseJsonNumber() { |
| bool negative = false; |
| - |
| + beg_pos_ = position_; |
| if (c0_ == '-') { |
| Advance(); |
| negative = true; |
| @@ -311,11 +220,11 @@ |
| Advance(); |
| // Prefix zero is only allowed if it's the only digit before |
| // a decimal point or exponent. |
| - if ('0' <= c0_ && c0_ <= '9') return Token::ILLEGAL; |
| + if ('0' <= c0_ && c0_ <= '9') return ReportUnexpectedToken(); |
| } else { |
| int i = 0; |
| int digits = 0; |
| - if (c0_ < '1' || c0_ > '9') return Token::ILLEGAL; |
| + if (c0_ < '1' || c0_ > '9') return ReportUnexpectedToken(); |
| do { |
| i = i * 10 + c0_ - '0'; |
| digits++; |
| @@ -323,12 +232,13 @@ |
| } while (c0_ >= '0' && c0_ <= '9'); |
| if (c0_ != '.' && c0_ != 'e' && c0_ != 'E' && digits < 10) { |
| number_ = (negative ? -i : i); |
| - return Token::NUMBER; |
| + SkipWS(); |
| + return isolate()->factory()->NewNumber(number_); |
| } |
| } |
| if (c0_ == '.') { |
| Advance(); |
| - if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL; |
| + if (c0_ < '0' || c0_ > '9') return ReportUnexpectedToken(); |
| do { |
| Advance(); |
| } while (c0_ >= '0' && c0_ <= '9'); |
| @@ -336,37 +246,38 @@ |
| if (AsciiAlphaToLower(c0_) == 'e') { |
| Advance(); |
| if (c0_ == '-' || c0_ == '+') Advance(); |
| - if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL; |
| + if (c0_ < '0' || c0_ > '9') return ReportUnexpectedToken(); |
| do { |
| Advance(); |
| } while (c0_ >= '0' && c0_ <= '9'); |
| } |
| + int length = position_ - beg_pos_; |
| if (is_sequential_ascii_) { |
| - Vector<const char> chars(seq_source_->GetChars() + next_.beg_pos, |
| - position_ - next_.beg_pos); |
| + Vector<const char> chars(seq_source_->GetChars() + beg_pos_, length); |
| number_ = StringToDouble(isolate()->unicode_cache(), |
| chars, |
| NO_FLAGS, // Hex, octal or trailing junk. |
| OS::nan_value()); |
| } else { |
| - Vector<char> buffer = Vector<char>::New(position_ - next_.beg_pos); |
| - String::WriteToFlat(*source_, buffer.start(), next_.beg_pos, position_); |
| + Vector<char> buffer = Vector<char>::New(length); |
| + String::WriteToFlat(*source_, buffer.start(), beg_pos_, position_); |
| Vector<const char> result = |
| Vector<const char>(reinterpret_cast<const char*>(buffer.start()), |
| - position_ - next_.beg_pos); |
| + length); |
| number_ = StringToDouble(isolate()->unicode_cache(), |
| result, |
| NO_FLAGS, // Hex, octal or trailing junk. |
| 0.0); |
| buffer.Dispose(); |
| } |
| - return Token::NUMBER; |
| + SkipWS(); |
| + return isolate()->factory()->NewNumber(number_); |
| } |
| -Token::Value JsonParser::SlowScanJsonString() { |
| +Handle<Object> JsonParser::SlowScanJsonString() { |
| // The currently scanned ascii characters. |
| Handle<String> ascii(isolate()->factory()->NewSubString(source_, |
| - next_.beg_pos + 1, |
| + beg_pos_, |
| position_)); |
| Handle<String> two_byte = |
| isolate()->factory()->NewRawTwoByteString(kInitialSpecialStringSize, |
| @@ -392,7 +303,7 @@ |
| } |
| // Check for control character (0x00-0x1f) or unterminated string (<0). |
| - if (c0_ < 0x20) return Token::ILLEGAL; |
| + if (c0_ < 0x20) return ReportUnexpectedToken(); |
| if (c0_ != '\\') { |
| seq_two_byte->SeqTwoByteStringSet(count++, c0_); |
| Advance(); |
| @@ -425,7 +336,7 @@ |
| Advance(); |
| int digit = HexValue(c0_); |
| if (digit < 0) { |
| - return Token::ILLEGAL; |
| + return ReportUnexpectedToken(); |
| } |
| value = value * 16 + digit; |
| } |
| @@ -433,14 +344,14 @@ |
| break; |
| } |
| default: |
| - return Token::ILLEGAL; |
| + return ReportUnexpectedToken(); |
| } |
| Advance(); |
| } |
| } |
| // Advance past the last '"'. |
| ASSERT_EQ('"', c0_); |
| - Advance(); |
| + AdvanceWS(); |
| // Shrink the the string to our length. |
| if (isolate()->heap()->InNewSpace(*seq_two_byte)) { |
| @@ -456,21 +367,19 @@ |
| seq_two_byte->set_length(count); |
| isolate()->heap()->CreateFillerObjectAt(start_filler_object, delta); |
| } |
| - string_val_ = isolate()->factory()->NewConsString(ascii, seq_two_byte); |
| - return Token::STRING; |
| + return isolate()->factory()->NewConsString(ascii, seq_two_byte); |
| } |
| -Token::Value JsonParser::ScanJsonString() { |
| +template <bool is_symbol> |
| +Handle<Object> JsonParser::ScanJsonString() { |
| ASSERT_EQ('"', c0_); |
| - // Set string_val to null. If string_val is not set we assume an |
| - // ascii string begining at next_.beg_pos + 1 to next_.end_pos - 1. |
| - string_val_ = Handle<String>::null(); |
| Advance(); |
| + beg_pos_ = position_; |
| // Fast case for ascii only without escape characters. |
| while (c0_ != '"') { |
| // Check for control character (0x00-0x1f) or unterminated string (<0). |
| - if (c0_ < 0x20) return Token::ILLEGAL; |
| + if (c0_ < 0x20) return ReportUnexpectedToken(); |
| if (c0_ != '\\' && c0_ < kMaxAsciiCharCode) { |
| Advance(); |
| } else { |
| @@ -478,36 +387,16 @@ |
| } |
| } |
| ASSERT_EQ('"', c0_); |
| + end_pos_ = position_; |
| // Advance past the last '"'. |
| - Advance(); |
| - return Token::STRING; |
| -} |
| - |
| -Handle<String> JsonParser::GetString() { |
| - return GetString(false); |
| -} |
| - |
| -Handle<String> JsonParser::GetSymbol() { |
| - Handle<String> result = GetString(true); |
| - if (result->IsSymbol()) return result; |
| - return isolate()->factory()->LookupSymbol(result); |
| -} |
| - |
| -Handle<String> JsonParser::GetString(bool hint_symbol) { |
| - // We have a non ascii string, return that. |
| - if (!string_val_.is_null()) return string_val_; |
| - |
| - if (is_sequential_ascii_ && hint_symbol) { |
| - Handle<SeqAsciiString> seq = Handle<SeqAsciiString>::cast(source_); |
| - // The current token includes the '"' in both ends. |
| - int length = current_.end_pos - current_.beg_pos - 2; |
| + AdvanceWS(); |
| + if (is_sequential_ascii_ && is_symbol) { |
| return isolate()->factory()->LookupAsciiSymbol(seq_source_, |
| - current_.beg_pos + 1, |
| - length); |
| + beg_pos_, |
| + end_pos_ - beg_pos_); |
| + } else { |
| + return isolate()->factory()->NewSubString(source_, beg_pos_, end_pos_); |
| } |
| - // The current token includes the '"' in both ends. |
| - return isolate()->factory()->NewSubString( |
| - source_, current_.beg_pos + 1, current_.end_pos - 1); |
| } |
| } } // namespace v8::internal |