OLD | NEW |
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 10 matching lines...) Expand all Loading... |
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
27 | 27 |
28 #ifndef V8_JSON_PARSER_H_ | 28 #ifndef V8_JSON_PARSER_H_ |
29 #define V8_JSON_PARSER_H_ | 29 #define V8_JSON_PARSER_H_ |
30 | 30 |
| 31 #include "v8.h" |
| 32 |
| 33 #include "char-predicates-inl.h" |
| 34 #include "conversions.h" |
| 35 #include "messages.h" |
| 36 #include "spaces-inl.h" |
31 #include "token.h" | 37 #include "token.h" |
32 | 38 |
33 namespace v8 { | 39 namespace v8 { |
34 namespace internal { | 40 namespace internal { |
35 | 41 |
36 // A simple json parser. | 42 // A simple json parser. |
| 43 template <bool seq_ascii> |
37 class JsonParser BASE_EMBEDDED { | 44 class JsonParser BASE_EMBEDDED { |
38 public: | 45 public: |
39 static Handle<Object> Parse(Handle<String> source) { | 46 static Handle<Object> Parse(Handle<String> source) { |
40 return JsonParser().ParseJson(source); | 47 return JsonParser().ParseJson(source); |
41 } | 48 } |
42 | 49 |
43 static const int kEndOfString = -1; | 50 static const int kEndOfString = -1; |
44 | 51 |
45 private: | 52 private: |
46 // Parse a string containing a single JSON value. | 53 // Parse a string containing a single JSON value. |
47 Handle<Object> ParseJson(Handle<String> source); | 54 Handle<Object> ParseJson(Handle<String> source); |
48 | 55 |
49 inline void Advance() { | 56 inline void Advance() { |
50 position_++; | 57 position_++; |
51 if (position_ > source_length_) { | 58 if (position_ > source_length_) { |
52 c0_ = kEndOfString; | 59 c0_ = kEndOfString; |
53 } else if (is_sequential_ascii_) { | 60 } else if (seq_ascii) { |
54 c0_ = seq_source_->SeqAsciiStringGet(position_); | 61 c0_ = seq_source_->SeqAsciiStringGet(position_); |
55 } else { | 62 } else { |
56 c0_ = source_->Get(position_); | 63 c0_ = source_->Get(position_); |
57 } | 64 } |
58 } | 65 } |
59 | 66 |
60 // The JSON lexical grammar is specified in the ECMAScript 5 standard, | 67 // The JSON lexical grammar is specified in the ECMAScript 5 standard, |
61 // section 15.12.1.1. The only allowed whitespace characters between tokens | 68 // section 15.12.1.1. The only allowed whitespace characters between tokens |
62 // are tab, carriage-return, newline and space. | 69 // are tab, carriage-return, newline and space. |
63 | 70 |
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
142 inline Isolate* isolate() { return isolate_; } | 149 inline Isolate* isolate() { return isolate_; } |
143 | 150 |
144 static const int kInitialSpecialStringSize = 1024; | 151 static const int kInitialSpecialStringSize = 1024; |
145 | 152 |
146 | 153 |
147 private: | 154 private: |
148 Handle<String> source_; | 155 Handle<String> source_; |
149 int source_length_; | 156 int source_length_; |
150 Handle<SeqAsciiString> seq_source_; | 157 Handle<SeqAsciiString> seq_source_; |
151 | 158 |
152 bool is_sequential_ascii_; | |
153 // begin and end position of scanned string or number | 159 // begin and end position of scanned string or number |
154 int beg_pos_; | 160 int beg_pos_; |
155 int end_pos_; | 161 int end_pos_; |
156 | 162 |
157 Isolate* isolate_; | 163 Isolate* isolate_; |
158 uc32 c0_; | 164 uc32 c0_; |
159 int position_; | 165 int position_; |
160 | 166 |
161 double number_; | 167 double number_; |
162 }; | 168 }; |
163 | 169 |
| 170 template <bool seq_ascii> |
| 171 Handle<Object> JsonParser<seq_ascii>::ParseJson(Handle<String> source) { |
| 172 isolate_ = source->map()->isolate(); |
| 173 source_ = Handle<String>(source->TryFlattenGetString()); |
| 174 source_length_ = source_->length() - 1; |
| 175 |
| 176 // Optimized fast case where we only have ascii characters. |
| 177 if (seq_ascii) { |
| 178 seq_source_ = Handle<SeqAsciiString>::cast(source_); |
| 179 } |
| 180 |
| 181 // Set initial position right before the string. |
| 182 position_ = -1; |
| 183 // Advance to the first character (posibly EOS) |
| 184 AdvanceSkipWhitespace(); |
| 185 Handle<Object> result = ParseJsonValue(); |
| 186 if (result.is_null() || c0_ != kEndOfString) { |
| 187 // Parse failed. Current character is the unexpected token. |
| 188 |
| 189 const char* message; |
| 190 Factory* factory = isolate()->factory(); |
| 191 Handle<JSArray> array; |
| 192 |
| 193 switch (c0_) { |
| 194 case kEndOfString: |
| 195 message = "unexpected_eos"; |
| 196 array = factory->NewJSArray(0); |
| 197 break; |
| 198 case '-': |
| 199 case '0': |
| 200 case '1': |
| 201 case '2': |
| 202 case '3': |
| 203 case '4': |
| 204 case '5': |
| 205 case '6': |
| 206 case '7': |
| 207 case '8': |
| 208 case '9': |
| 209 message = "unexpected_token_number"; |
| 210 array = factory->NewJSArray(0); |
| 211 break; |
| 212 case '"': |
| 213 message = "unexpected_token_string"; |
| 214 array = factory->NewJSArray(0); |
| 215 break; |
| 216 default: |
| 217 message = "unexpected_token"; |
| 218 Handle<Object> name = LookupSingleCharacterStringFromCode(c0_); |
| 219 Handle<FixedArray> element = factory->NewFixedArray(1); |
| 220 element->set(0, *name); |
| 221 array = factory->NewJSArrayWithElements(element); |
| 222 break; |
| 223 } |
| 224 |
| 225 MessageLocation location(factory->NewScript(source), |
| 226 position_, |
| 227 position_ + 1); |
| 228 Handle<Object> result = factory->NewSyntaxError(message, array); |
| 229 isolate()->Throw(*result, &location); |
| 230 return Handle<Object>::null(); |
| 231 } |
| 232 return result; |
| 233 } |
| 234 |
| 235 |
| 236 // Parse any JSON value. |
| 237 template <bool seq_ascii> |
| 238 Handle<Object> JsonParser<seq_ascii>::ParseJsonValue() { |
| 239 switch (c0_) { |
| 240 case '"': |
| 241 return ParseJsonString(); |
| 242 case '-': |
| 243 case '0': |
| 244 case '1': |
| 245 case '2': |
| 246 case '3': |
| 247 case '4': |
| 248 case '5': |
| 249 case '6': |
| 250 case '7': |
| 251 case '8': |
| 252 case '9': |
| 253 return ParseJsonNumber(); |
| 254 case 'f': |
| 255 if (AdvanceGetChar() == 'a' && AdvanceGetChar() == 'l' && |
| 256 AdvanceGetChar() == 's' && AdvanceGetChar() == 'e') { |
| 257 AdvanceSkipWhitespace(); |
| 258 return isolate()->factory()->false_value(); |
| 259 } else { |
| 260 return ReportUnexpectedCharacter(); |
| 261 } |
| 262 case 't': |
| 263 if (AdvanceGetChar() == 'r' && AdvanceGetChar() == 'u' && |
| 264 AdvanceGetChar() == 'e') { |
| 265 AdvanceSkipWhitespace(); |
| 266 return isolate()->factory()->true_value(); |
| 267 } else { |
| 268 return ReportUnexpectedCharacter(); |
| 269 } |
| 270 case 'n': |
| 271 if (AdvanceGetChar() == 'u' && AdvanceGetChar() == 'l' && |
| 272 AdvanceGetChar() == 'l') { |
| 273 AdvanceSkipWhitespace(); |
| 274 return isolate()->factory()->null_value(); |
| 275 } else { |
| 276 return ReportUnexpectedCharacter(); |
| 277 } |
| 278 case '{': |
| 279 return ParseJsonObject(); |
| 280 case '[': |
| 281 return ParseJsonArray(); |
| 282 default: |
| 283 return ReportUnexpectedCharacter(); |
| 284 } |
| 285 } |
| 286 |
| 287 |
| 288 // Parse a JSON object. Position must be right at '{'. |
| 289 template <bool seq_ascii> |
| 290 Handle<Object> JsonParser<seq_ascii>::ParseJsonObject() { |
| 291 Handle<JSFunction> object_constructor( |
| 292 isolate()->global_context()->object_function()); |
| 293 Handle<JSObject> json_object = |
| 294 isolate()->factory()->NewJSObject(object_constructor); |
| 295 ASSERT_EQ(c0_, '{'); |
| 296 |
| 297 AdvanceSkipWhitespace(); |
| 298 if (c0_ != '}') { |
| 299 do { |
| 300 if (c0_ != '"') return ReportUnexpectedCharacter(); |
| 301 Handle<String> key = ParseJsonSymbol(); |
| 302 if (key.is_null() || c0_ != ':') return ReportUnexpectedCharacter(); |
| 303 AdvanceSkipWhitespace(); |
| 304 Handle<Object> value = ParseJsonValue(); |
| 305 if (value.is_null()) return ReportUnexpectedCharacter(); |
| 306 |
| 307 uint32_t index; |
| 308 if (key->AsArrayIndex(&index)) { |
| 309 SetOwnElement(json_object, index, value, kNonStrictMode); |
| 310 } else if (key->Equals(isolate()->heap()->Proto_symbol())) { |
| 311 SetPrototype(json_object, value); |
| 312 } else { |
| 313 SetLocalPropertyIgnoreAttributes(json_object, key, value, NONE); |
| 314 } |
| 315 } while (MatchSkipWhiteSpace(',')); |
| 316 if (c0_ != '}') { |
| 317 return ReportUnexpectedCharacter(); |
| 318 } |
| 319 } |
| 320 AdvanceSkipWhitespace(); |
| 321 return json_object; |
| 322 } |
| 323 |
| 324 // Parse a JSON array. Position must be right at '['. |
| 325 template <bool seq_ascii> |
| 326 Handle<Object> JsonParser<seq_ascii>::ParseJsonArray() { |
| 327 ZoneScope zone_scope(isolate(), DELETE_ON_EXIT); |
| 328 ZoneList<Handle<Object> > elements(4); |
| 329 ASSERT_EQ(c0_, '['); |
| 330 |
| 331 AdvanceSkipWhitespace(); |
| 332 if (c0_ != ']') { |
| 333 do { |
| 334 Handle<Object> element = ParseJsonValue(); |
| 335 if (element.is_null()) return ReportUnexpectedCharacter(); |
| 336 elements.Add(element); |
| 337 } while (MatchSkipWhiteSpace(',')); |
| 338 if (c0_ != ']') { |
| 339 return ReportUnexpectedCharacter(); |
| 340 } |
| 341 } |
| 342 AdvanceSkipWhitespace(); |
| 343 // Allocate a fixed array with all the elements. |
| 344 Handle<FixedArray> fast_elements = |
| 345 isolate()->factory()->NewFixedArray(elements.length()); |
| 346 for (int i = 0, n = elements.length(); i < n; i++) { |
| 347 fast_elements->set(i, *elements[i]); |
| 348 } |
| 349 return isolate()->factory()->NewJSArrayWithElements(fast_elements); |
| 350 } |
| 351 |
| 352 |
| 353 template <bool seq_ascii> |
| 354 Handle<Object> JsonParser<seq_ascii>::ParseJsonNumber() { |
| 355 bool negative = false; |
| 356 beg_pos_ = position_; |
| 357 if (c0_ == '-') { |
| 358 Advance(); |
| 359 negative = true; |
| 360 } |
| 361 if (c0_ == '0') { |
| 362 Advance(); |
| 363 // Prefix zero is only allowed if it's the only digit before |
| 364 // a decimal point or exponent. |
| 365 if ('0' <= c0_ && c0_ <= '9') return ReportUnexpectedCharacter(); |
| 366 } else { |
| 367 int i = 0; |
| 368 int digits = 0; |
| 369 if (c0_ < '1' || c0_ > '9') return ReportUnexpectedCharacter(); |
| 370 do { |
| 371 i = i * 10 + c0_ - '0'; |
| 372 digits++; |
| 373 Advance(); |
| 374 } while (c0_ >= '0' && c0_ <= '9'); |
| 375 if (c0_ != '.' && c0_ != 'e' && c0_ != 'E' && digits < 10) { |
| 376 number_ = (negative ? -i : i); |
| 377 SkipWhitespace(); |
| 378 return isolate()->factory()->NewNumber(number_); |
| 379 } |
| 380 } |
| 381 if (c0_ == '.') { |
| 382 Advance(); |
| 383 if (c0_ < '0' || c0_ > '9') return ReportUnexpectedCharacter(); |
| 384 do { |
| 385 Advance(); |
| 386 } while (c0_ >= '0' && c0_ <= '9'); |
| 387 } |
| 388 if (AsciiAlphaToLower(c0_) == 'e') { |
| 389 Advance(); |
| 390 if (c0_ == '-' || c0_ == '+') Advance(); |
| 391 if (c0_ < '0' || c0_ > '9') return ReportUnexpectedCharacter(); |
| 392 do { |
| 393 Advance(); |
| 394 } while (c0_ >= '0' && c0_ <= '9'); |
| 395 } |
| 396 int length = position_ - beg_pos_; |
| 397 if (seq_ascii) { |
| 398 Vector<const char> chars(seq_source_->GetChars() + beg_pos_, length); |
| 399 number_ = StringToDouble(isolate()->unicode_cache(), |
| 400 chars, |
| 401 NO_FLAGS, // Hex, octal or trailing junk. |
| 402 OS::nan_value()); |
| 403 } else { |
| 404 Vector<char> buffer = Vector<char>::New(length); |
| 405 String::WriteToFlat(*source_, buffer.start(), beg_pos_, position_); |
| 406 Vector<const char> result = |
| 407 Vector<const char>(reinterpret_cast<const char*>(buffer.start()), |
| 408 length); |
| 409 number_ = StringToDouble(isolate()->unicode_cache(), |
| 410 result, |
| 411 NO_FLAGS, // Hex, octal or trailing junk. |
| 412 0.0); |
| 413 buffer.Dispose(); |
| 414 } |
| 415 SkipWhitespace(); |
| 416 return isolate()->factory()->NewNumber(number_); |
| 417 } |
| 418 |
| 419 template <bool seq_ascii> |
| 420 Handle<String> JsonParser<seq_ascii>::SlowScanJsonString() { |
| 421 // The currently scanned ascii characters. |
| 422 Handle<String> ascii(isolate()->factory()->NewSubString(source_, |
| 423 beg_pos_, |
| 424 position_)); |
| 425 Handle<String> two_byte = |
| 426 isolate()->factory()->NewRawTwoByteString(kInitialSpecialStringSize, |
| 427 NOT_TENURED); |
| 428 Handle<SeqTwoByteString> seq_two_byte = |
| 429 Handle<SeqTwoByteString>::cast(two_byte); |
| 430 |
| 431 int allocation_count = 1; |
| 432 int count = 0; |
| 433 |
| 434 while (c0_ != '"') { |
| 435 // Create new seq string |
| 436 if (count >= kInitialSpecialStringSize * allocation_count) { |
| 437 allocation_count = allocation_count * 2; |
| 438 int new_size = allocation_count * kInitialSpecialStringSize; |
| 439 Handle<String> new_two_byte = |
| 440 isolate()->factory()->NewRawTwoByteString(new_size, |
| 441 NOT_TENURED); |
| 442 uc16* char_start = |
| 443 Handle<SeqTwoByteString>::cast(new_two_byte)->GetChars(); |
| 444 String::WriteToFlat(*seq_two_byte, char_start, 0, count); |
| 445 seq_two_byte = Handle<SeqTwoByteString>::cast(new_two_byte); |
| 446 } |
| 447 |
| 448 // Check for control character (0x00-0x1f) or unterminated string (<0). |
| 449 if (c0_ < 0x20) return Handle<String>::null(); |
| 450 if (c0_ != '\\') { |
| 451 seq_two_byte->SeqTwoByteStringSet(count++, c0_); |
| 452 Advance(); |
| 453 } else { |
| 454 Advance(); |
| 455 switch (c0_) { |
| 456 case '"': |
| 457 case '\\': |
| 458 case '/': |
| 459 seq_two_byte->SeqTwoByteStringSet(count++, c0_); |
| 460 break; |
| 461 case 'b': |
| 462 seq_two_byte->SeqTwoByteStringSet(count++, '\x08'); |
| 463 break; |
| 464 case 'f': |
| 465 seq_two_byte->SeqTwoByteStringSet(count++, '\x0c'); |
| 466 break; |
| 467 case 'n': |
| 468 seq_two_byte->SeqTwoByteStringSet(count++, '\x0a'); |
| 469 break; |
| 470 case 'r': |
| 471 seq_two_byte->SeqTwoByteStringSet(count++, '\x0d'); |
| 472 break; |
| 473 case 't': |
| 474 seq_two_byte->SeqTwoByteStringSet(count++, '\x09'); |
| 475 break; |
| 476 case 'u': { |
| 477 uc32 value = 0; |
| 478 for (int i = 0; i < 4; i++) { |
| 479 Advance(); |
| 480 int digit = HexValue(c0_); |
| 481 if (digit < 0) { |
| 482 return Handle<String>::null(); |
| 483 } |
| 484 value = value * 16 + digit; |
| 485 } |
| 486 seq_two_byte->SeqTwoByteStringSet(count++, value); |
| 487 break; |
| 488 } |
| 489 default: |
| 490 return Handle<String>::null(); |
| 491 } |
| 492 Advance(); |
| 493 } |
| 494 } |
| 495 // Advance past the last '"'. |
| 496 ASSERT_EQ('"', c0_); |
| 497 AdvanceSkipWhitespace(); |
| 498 |
| 499 // Shrink the the string to our length. |
| 500 if (isolate()->heap()->InNewSpace(*seq_two_byte)) { |
| 501 isolate()->heap()->new_space()-> |
| 502 template ShrinkStringAtAllocationBoundary<SeqTwoByteString>( |
| 503 *seq_two_byte, count); |
| 504 } else { |
| 505 int string_size = SeqTwoByteString::SizeFor(count); |
| 506 int allocated_string_size = |
| 507 SeqTwoByteString::SizeFor(kInitialSpecialStringSize * allocation_count); |
| 508 int delta = allocated_string_size - string_size; |
| 509 Address start_filler_object = seq_two_byte->address() + string_size; |
| 510 seq_two_byte->set_length(count); |
| 511 isolate()->heap()->CreateFillerObjectAt(start_filler_object, delta); |
| 512 } |
| 513 return isolate()->factory()->NewConsString(ascii, seq_two_byte); |
| 514 } |
| 515 |
| 516 template <bool seq_ascii> |
| 517 template <bool is_symbol> |
| 518 Handle<String> JsonParser<seq_ascii>::ScanJsonString() { |
| 519 ASSERT_EQ('"', c0_); |
| 520 Advance(); |
| 521 beg_pos_ = position_; |
| 522 // Fast case for ascii only without escape characters. |
| 523 while (c0_ != '"') { |
| 524 // Check for control character (0x00-0x1f) or unterminated string (<0). |
| 525 if (c0_ < 0x20) return Handle<String>::null(); |
| 526 if (c0_ != '\\' && (seq_ascii || c0_ < kMaxAsciiCharCode)) { |
| 527 Advance(); |
| 528 } else { |
| 529 return this->SlowScanJsonString(); |
| 530 } |
| 531 } |
| 532 ASSERT_EQ('"', c0_); |
| 533 end_pos_ = position_; |
| 534 // Advance past the last '"'. |
| 535 AdvanceSkipWhitespace(); |
| 536 if (seq_ascii && is_symbol) { |
| 537 return isolate()->factory()->LookupAsciiSymbol(seq_source_, |
| 538 beg_pos_, |
| 539 end_pos_ - beg_pos_); |
| 540 } else { |
| 541 return isolate()->factory()->NewSubString(source_, beg_pos_, end_pos_); |
| 542 } |
| 543 } |
| 544 |
164 } } // namespace v8::internal | 545 } } // namespace v8::internal |
165 | 546 |
166 #endif // V8_JSON_PARSER_H_ | 547 #endif // V8_JSON_PARSER_H_ |
OLD | NEW |