Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | |
| 2 // Redistribution and use in source and binary forms, with or without | |
| 3 // modification, are permitted provided that the following conditions are | |
| 4 // met: | |
| 5 // | |
| 6 // * Redistributions of source code must retain the above copyright | |
| 7 // notice, this list of conditions and the following disclaimer. | |
| 8 // * Redistributions in binary form must reproduce the above | |
| 9 // copyright notice, this list of conditions and the following | |
| 10 // disclaimer in the documentation and/or other materials provided | |
| 11 // with the distribution. | |
| 12 // * Neither the name of Google Inc. nor the names of its | |
| 13 // contributors may be used to endorse or promote products derived | |
| 14 // from this software without specific prior written permission. | |
| 15 // | |
| 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| 19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
| 20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
| 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
| 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
| 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
| 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
| 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 27 | |
| 28 #include "v8.h" | |
| 29 | |
| 30 #include "char-predicates-inl.h" | |
| 31 #include "conversions.h" | |
| 32 #include "json-parser.h" | |
| 33 #include "messages.h" | |
| 34 #include "spaces.h" | |
| 35 | |
| 36 namespace v8 { | |
| 37 namespace internal { | |
| 38 | |
| 39 | |
| 40 Handle<Object> JsonParser::ParseJson(Handle<String> source) { | |
| 41 source_ = Handle<String>(source->TryFlattenGetString()); | |
| 42 source_length_ = source_->length() - 1; | |
| 43 | |
| 44 // Optimized fast case where we only have ascii characters. | |
| 45 if (source_->IsSeqAsciiString()) { | |
| 46 is_sequential_ascii_ = true; | |
|
Lasse Reichstein
2011/05/19 07:27:40
This is not a property that is guaranteed to be pr
Rico
2011/05/23 18:18:12
As discussed offline, this is not the case right?
Lasse Reichstein
2011/05/24 07:28:34
That what is not the case? That strings can change
Rico
2011/05/24 08:49:24
OK, changed so that we reinitialize this variable
| |
| 47 seq_source_ = Handle<SeqAsciiString>::cast(source_); | |
| 48 } else { | |
| 49 is_sequential_ascii_ = false; | |
| 50 } | |
| 51 | |
| 52 // Set initial position right before the string. | |
| 53 position_ = -1; | |
| 54 // Advance to the first character (posibly EOS) | |
| 55 Advance(); | |
| 56 Next(); | |
| 57 Handle<Object> result = ParseJsonValue(); | |
| 58 if (result.is_null() || Next() != Token::EOS) { | |
| 59 // Parse failed. Scanner's current token is the unexpected token. | |
| 60 Token::Value token = current_.token; | |
| 61 | |
| 62 const char* message; | |
| 63 const char* name_opt = NULL; | |
| 64 | |
| 65 switch (token) { | |
| 66 case Token::EOS: | |
| 67 message = "unexpected_eos"; | |
| 68 break; | |
| 69 case Token::NUMBER: | |
| 70 message = "unexpected_token_number"; | |
| 71 break; | |
| 72 case Token::STRING: | |
| 73 message = "unexpected_token_string"; | |
| 74 break; | |
| 75 case Token::IDENTIFIER: | |
| 76 case Token::FUTURE_RESERVED_WORD: | |
| 77 message = "unexpected_token_identifier"; | |
| 78 break; | |
| 79 default: | |
| 80 message = "unexpected_token"; | |
| 81 name_opt = Token::String(token); | |
| 82 ASSERT(name_opt != NULL); | |
| 83 break; | |
| 84 } | |
| 85 | |
| 86 Factory* factory = isolate()->factory(); | |
| 87 MessageLocation location(factory->NewScript(source), | |
| 88 current_.beg_pos, | |
| 89 current_.end_pos); | |
| 90 Handle<JSArray> array; | |
| 91 if (name_opt == NULL) { | |
| 92 array = factory->NewJSArray(0); | |
| 93 } else { | |
| 94 Handle<String> name = factory->NewStringFromUtf8(CStrVector(name_opt)); | |
| 95 Handle<FixedArray> element = factory->NewFixedArray(1); | |
| 96 element->set(0, *name); | |
| 97 array = factory->NewJSArrayWithElements(element); | |
| 98 } | |
| 99 Handle<Object> result = factory->NewSyntaxError(message, array); | |
| 100 isolate()->Throw(*result, &location); | |
| 101 return Handle<Object>::null(); | |
| 102 } | |
| 103 return result; | |
| 104 } | |
| 105 | |
| 106 | |
| 107 // Parse any JSON value. | |
| 108 Handle<Object> JsonParser::ParseJsonValue() { | |
| 109 Token::Value token = Next(); | |
| 110 switch (token) { | |
| 111 case Token::STRING: | |
| 112 return GetString(false); | |
| 113 case Token::NUMBER: | |
| 114 return isolate()->factory()->NewNumber(number_); | |
| 115 case Token::FALSE_LITERAL: | |
| 116 return isolate()->factory()->false_value(); | |
| 117 case Token::TRUE_LITERAL: | |
| 118 return isolate()->factory()->true_value(); | |
| 119 case Token::NULL_LITERAL: | |
| 120 return isolate()->factory()->null_value(); | |
| 121 case Token::LBRACE: | |
| 122 return ParseJsonObject(); | |
| 123 case Token::LBRACK: | |
| 124 return ParseJsonArray(); | |
| 125 default: | |
| 126 return ReportUnexpectedToken(); | |
| 127 } | |
| 128 } | |
| 129 | |
| 130 | |
| 131 // Parse a JSON object. Scanner must be right after '{' token. | |
| 132 Handle<Object> JsonParser::ParseJsonObject() { | |
| 133 Handle<JSFunction> object_constructor( | |
| 134 isolate()->global_context()->object_function()); | |
| 135 Handle<JSObject> json_object = | |
| 136 isolate()->factory()->NewJSObject(object_constructor); | |
| 137 | |
| 138 if (Peek() == Token::RBRACE) { | |
| 139 Next(); | |
| 140 } else { | |
| 141 do { | |
| 142 if (Next() != Token::STRING) { | |
| 143 return ReportUnexpectedToken(); | |
| 144 } | |
| 145 Handle<String> key = GetString(true); | |
|
Lasse Reichstein
2011/05/19 07:27:40
How about having two functions: GetString() and Ge
Rico
2011/05/23 18:18:12
Done.
| |
| 146 if (Next() != Token::COLON) { | |
| 147 return ReportUnexpectedToken(); | |
| 148 } | |
| 149 | |
| 150 Handle<Object> value = ParseJsonValue(); | |
| 151 if (value.is_null()) return Handle<Object>::null(); | |
| 152 | |
| 153 uint32_t index; | |
| 154 if (key->AsArrayIndex(&index)) { | |
| 155 SetOwnElement(json_object, index, value, kNonStrictMode); | |
| 156 } else if (key->Equals(isolate()->heap()->Proto_symbol())) { | |
| 157 SetPrototype(json_object, value); | |
| 158 } else { | |
| 159 SetLocalPropertyIgnoreAttributes(json_object, key, value, NONE); | |
| 160 } | |
| 161 } while (Next() == Token::COMMA); | |
| 162 if (current_.token != Token::RBRACE) { | |
| 163 return ReportUnexpectedToken(); | |
| 164 } | |
| 165 } | |
| 166 return json_object; | |
| 167 } | |
| 168 | |
| 169 // Parse a JSON array. Scanner must be right after '[' token. | |
| 170 Handle<Object> JsonParser::ParseJsonArray() { | |
| 171 ZoneScope zone_scope(DELETE_ON_EXIT); | |
| 172 ZoneList<Handle<Object> > elements(4); | |
| 173 | |
| 174 Token::Value token = Peek(); | |
| 175 if (token == Token::RBRACK) { | |
| 176 Next(); | |
| 177 } else { | |
| 178 do { | |
| 179 Handle<Object> element = ParseJsonValue(); | |
| 180 if (element.is_null()) return Handle<Object>::null(); | |
| 181 elements.Add(element); | |
| 182 token = Next(); | |
| 183 } while (token == Token::COMMA); | |
| 184 if (token != Token::RBRACK) { | |
| 185 return ReportUnexpectedToken(); | |
| 186 } | |
| 187 } | |
| 188 | |
| 189 // Allocate a fixed array with all the elements. | |
| 190 Handle<FixedArray> fast_elements = | |
| 191 isolate()->factory()->NewFixedArray(elements.length()); | |
| 192 | |
| 193 for (int i = 0, n = elements.length(); i < n; i++) { | |
| 194 fast_elements->set(i, *elements[i]); | |
| 195 } | |
| 196 | |
| 197 return isolate()->factory()->NewJSArrayWithElements(fast_elements); | |
| 198 } | |
| 199 | |
| 200 | |
| 201 Token::Value JsonParser::Next() { | |
| 202 current_ = next_; | |
| 203 ScanJson(); | |
| 204 return current_.token; | |
| 205 } | |
| 206 | |
| 207 void JsonParser::ScanJson() { | |
| 208 Token::Value token; | |
| 209 do { | |
| 210 // Remember the position of the next token | |
| 211 next_.beg_pos = position_; | |
| 212 switch (c0_) { | |
| 213 case '\t': | |
| 214 case '\r': | |
| 215 case '\n': | |
| 216 case ' ': | |
| 217 Advance(); | |
| 218 token = Token::WHITESPACE; | |
| 219 break; | |
| 220 case '{': | |
| 221 Advance(); | |
| 222 token = Token::LBRACE; | |
| 223 break; | |
| 224 case '}': | |
| 225 Advance(); | |
| 226 token = Token::RBRACE; | |
| 227 break; | |
| 228 case '[': | |
| 229 Advance(); | |
| 230 token = Token::LBRACK; | |
| 231 break; | |
| 232 case ']': | |
| 233 Advance(); | |
| 234 token = Token::RBRACK; | |
| 235 break; | |
| 236 case ':': | |
| 237 Advance(); | |
| 238 token = Token::COLON; | |
| 239 break; | |
| 240 case ',': | |
| 241 Advance(); | |
| 242 token = Token::COMMA; | |
| 243 break; | |
| 244 case '"': | |
| 245 token = ScanJsonString(); | |
| 246 break; | |
| 247 case '-': | |
| 248 case '0': | |
| 249 case '1': | |
| 250 case '2': | |
| 251 case '3': | |
| 252 case '4': | |
| 253 case '5': | |
| 254 case '6': | |
| 255 case '7': | |
| 256 case '8': | |
| 257 case '9': | |
| 258 token = ScanJsonNumber(); | |
| 259 break; | |
| 260 case 't': | |
| 261 token = ScanJsonIdentifier("true", Token::TRUE_LITERAL); | |
| 262 break; | |
| 263 case 'f': | |
| 264 token = ScanJsonIdentifier("false", Token::FALSE_LITERAL); | |
| 265 break; | |
| 266 case 'n': | |
| 267 token = ScanJsonIdentifier("null", Token::NULL_LITERAL); | |
| 268 break; | |
| 269 default: | |
| 270 if (c0_ < 0) { | |
| 271 Advance(); | |
| 272 token = Token::EOS; | |
| 273 } else { | |
| 274 Advance(); | |
| 275 token = Token::ILLEGAL; | |
| 276 } | |
| 277 } | |
| 278 } while (token == Token::WHITESPACE); | |
| 279 | |
| 280 next_.end_pos = position_; | |
| 281 next_.token = token; | |
| 282 } | |
| 283 | |
| 284 | |
| 285 Token::Value JsonParser::ScanJsonIdentifier(const char* text, | |
| 286 Token::Value token) { | |
| 287 while (*text != '\0') { | |
| 288 if (c0_ != *text) return Token::ILLEGAL; | |
| 289 Advance(); | |
| 290 text++; | |
| 291 } | |
| 292 return token; | |
| 293 } | |
| 294 | |
| 295 | |
| 296 Token::Value JsonParser::ScanJsonNumber() { | |
| 297 bool negative = false; | |
| 298 | |
| 299 if (c0_ == '-') { | |
| 300 Advance(); | |
| 301 negative = true; | |
| 302 } | |
| 303 if (c0_ == '0') { | |
| 304 Advance(); | |
| 305 // Prefix zero is only allowed if it's the only digit before | |
| 306 // a decimal point or exponent. | |
| 307 if ('0' <= c0_ && c0_ <= '9') return Token::ILLEGAL; | |
| 308 } else { | |
| 309 int i = 0; | |
| 310 int digits = 0; | |
| 311 if (c0_ < '1' || c0_ > '9') return Token::ILLEGAL; | |
| 312 do { | |
| 313 i = i * 10 + c0_ - '0'; | |
| 314 digits++; | |
| 315 Advance(); | |
| 316 } while (c0_ >= '0' && c0_ <= '9'); | |
| 317 if (c0_ != '.' && c0_ != 'e' && c0_ != 'E' && digits < 10) { | |
| 318 number_ = (negative ? -i : i); | |
| 319 return Token::NUMBER; | |
| 320 } | |
| 321 } | |
| 322 if (c0_ == '.') { | |
| 323 Advance(); | |
| 324 if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL; | |
| 325 do { | |
| 326 Advance(); | |
| 327 } while (c0_ >= '0' && c0_ <= '9'); | |
| 328 } | |
| 329 if (AsciiAlphaToLower(c0_) == 'e') { | |
| 330 Advance(); | |
| 331 if (c0_ == '-' || c0_ == '+') Advance(); | |
| 332 if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL; | |
| 333 do { | |
| 334 Advance(); | |
| 335 } while (c0_ >= '0' && c0_ <= '9'); | |
| 336 } | |
| 337 if (is_sequential_ascii_) { | |
| 338 Vector<const char> chars(seq_source_->GetChars() + next_.beg_pos, | |
| 339 position_ - next_.beg_pos); | |
| 340 number_ = StringToDouble(isolate()->unicode_cache(), | |
| 341 chars, | |
| 342 NO_FLAGS, // Hex, octal or trailing junk. | |
| 343 OS::nan_value()); | |
| 344 } else { | |
|
Lasse Reichstein
2011/05/19 07:27:40
Ick. Why create a heap string?
Just make a buffer
Rico
2011/05/23 18:18:12
Fast atoi? this is a double.
I added a conversion
| |
| 345 Handle<String> value = isolate()->factory()->NewSubString( | |
| 346 source_, next_.beg_pos, position_); | |
| 347 number_ = StringToDouble(isolate()->unicode_cache(), | |
| 348 *value, | |
| 349 NO_FLAGS, // Hex, octal or trailing junk. | |
| 350 OS::nan_value()); | |
| 351 } | |
| 352 return Token::NUMBER; | |
| 353 } | |
| 354 | |
| 355 Token::Value JsonParser::SlowScanJsonString() { | |
| 356 // The currently scanned ascii characters. | |
| 357 Handle<String> ascii(isolate()->factory()->NewSubString(source_, | |
| 358 next_.beg_pos + 1, | |
| 359 position_)); | |
| 360 Handle<String> two_byte = | |
| 361 isolate()->factory()->NewRawTwoByteString(kInitialSpecialStringSize, | |
| 362 NOT_TENURED); | |
| 363 ASSERT(two_byte->IsSeqTwoByteString()); | |
|
Lasse Reichstein
2011/05/19 07:27:40
Odd assert. How could that not happen?
Consider ch
Rico
2011/05/23 18:18:12
Done.
| |
| 364 Handle<SeqTwoByteString> seq_two_byte = | |
| 365 Handle<SeqTwoByteString>::cast(two_byte); | |
| 366 | |
| 367 int allocation_count = 1; | |
| 368 int count = 0; | |
| 369 | |
| 370 while (c0_ != '"') { | |
| 371 // Create new seq string | |
| 372 if (count >= kInitialSpecialStringSize * allocation_count) { | |
| 373 allocation_count++; | |
| 374 int new_size = allocation_count * kInitialSpecialStringSize; | |
| 375 Handle<String> new_two_byte = | |
| 376 isolate()->factory()->NewRawTwoByteString(new_size, | |
| 377 NOT_TENURED); | |
| 378 uc16* char_start = | |
| 379 Handle<SeqTwoByteString>::cast(new_two_byte)->GetChars(); | |
| 380 String::WriteToFlat(*seq_two_byte, char_start, 0, count); | |
| 381 seq_two_byte = Handle<SeqTwoByteString>::cast(new_two_byte); | |
| 382 } | |
| 383 | |
| 384 // Check for control character (0x00-0x1f) or unterminated string (<0). | |
| 385 if (c0_ < 0x20) return Token::ILLEGAL; | |
| 386 if (c0_ != '\\') { | |
| 387 seq_two_byte->SeqTwoByteStringSet(count++, c0_); | |
| 388 Advance(); | |
| 389 } else { | |
| 390 Advance(); | |
| 391 switch (c0_) { | |
| 392 case '"': | |
| 393 case '\\': | |
| 394 case '/': | |
| 395 seq_two_byte->SeqTwoByteStringSet(count++, c0_); | |
| 396 break; | |
| 397 case 'b': | |
| 398 seq_two_byte->SeqTwoByteStringSet(count++, '\x08'); | |
| 399 break; | |
| 400 case 'f': | |
| 401 seq_two_byte->SeqTwoByteStringSet(count++, '\x0c'); | |
| 402 break; | |
| 403 case 'n': | |
| 404 seq_two_byte->SeqTwoByteStringSet(count++, '\x0a'); | |
| 405 break; | |
| 406 case 'r': | |
| 407 seq_two_byte->SeqTwoByteStringSet(count++, '\x0d'); | |
| 408 break; | |
| 409 case 't': | |
| 410 seq_two_byte->SeqTwoByteStringSet(count++, '\x09'); | |
| 411 break; | |
| 412 case 'u': { | |
| 413 uc32 value = 0; | |
| 414 for (int i = 0; i < 4; i++) { | |
| 415 Advance(); | |
| 416 int digit = HexValue(c0_); | |
| 417 if (digit < 0) { | |
| 418 return Token::ILLEGAL; | |
| 419 } | |
| 420 value = value * 16 + digit; | |
| 421 } | |
| 422 seq_two_byte->SeqTwoByteStringSet(count++, value); | |
| 423 break; | |
| 424 } | |
| 425 default: | |
| 426 return Token::ILLEGAL; | |
| 427 } | |
| 428 Advance(); | |
| 429 } | |
| 430 } | |
| 431 // Advance past the last '"'. | |
| 432 ASSERT_EQ('"', c0_); | |
| 433 Advance(); | |
| 434 | |
| 435 // Shrink the the string to our length. | |
| 436 isolate()->heap()-> | |
| 437 new_space()-> | |
| 438 ShrinkStringAtAllocationBoundary<SeqTwoByteString>(*seq_two_byte, | |
| 439 count); | |
|
Lasse Reichstein
2011/05/19 07:27:40
If the ascii string is short (e.g., it's shorter t
Rico
2011/05/23 18:18:12
Indeed, this is one of those optimizations for the
| |
| 440 string_val_ = isolate()->factory()->NewConsString(ascii, seq_two_byte); | |
| 441 return Token::STRING; | |
| 442 } | |
| 443 | |
| 444 | |
| 445 Token::Value JsonParser::ScanJsonString() { | |
| 446 ASSERT_EQ('"', c0_); | |
| 447 // Set string_val to null. If string_val is not set we assume an | |
| 448 // ascii string begining at next_.beg_pos + 1 to next_.end_pos - 1. | |
| 449 string_val_ = Handle<String>::null(); | |
| 450 Advance(); | |
| 451 // Fast case for ascii only without escape characters. | |
| 452 while (c0_ != '"') { | |
| 453 // Check for control character (0x00-0x1f) or unterminated string (<0). | |
| 454 if (c0_ < 0x20) return Token::ILLEGAL; | |
| 455 if (c0_ != '\\' && c0_ < kMaxAsciiCharCode) { | |
| 456 Advance(); | |
| 457 } else { | |
| 458 return SlowScanJsonString(); | |
|
Lasse Reichstein
2011/05/19 07:27:40
The SlowScanJsonString creates a TwoByte string. I
Rico
2011/05/23 18:18:12
Yes
| |
| 459 } | |
| 460 } | |
| 461 ASSERT_EQ('"', c0_); | |
| 462 // Advance past the last '"'. | |
| 463 Advance(); | |
| 464 return Token::STRING; | |
| 465 } | |
| 466 | |
| 467 | |
| 468 Handle<String> JsonParser::GetString(bool is_symbol) { | |
| 469 // We have a non ascii string, return that. | |
| 470 if (!string_val_.is_null()) return string_val_; | |
|
Lasse Reichstein
2011/05/19 07:27:40
That does not make it a symbol if it isn't already
Rico
2011/05/23 18:18:12
renaming hint_symbol
| |
| 471 | |
| 472 if (is_sequential_ascii_ && is_symbol) { | |
| 473 Handle<SeqAsciiString> seq = Handle<SeqAsciiString>::cast(source_); | |
| 474 // The current token includes the '"' in both ends. | |
| 475 int length = current_.end_pos - current_.beg_pos - 2; | |
| 476 return isolate()->factory()->LookupAsciiSymbol(seq_source_, | |
| 477 current_.beg_pos + 1, | |
| 478 length); | |
| 479 } | |
| 480 // The current token includes the '"' in both ends. | |
| 481 return isolate()->factory()->NewSubString( | |
|
Lasse Reichstein
2011/05/19 07:27:40
This also doesn't make it a symbol even if is_symb
Rico
2011/05/23 18:18:12
Done.
| |
| 482 source_, current_.beg_pos + 1, current_.end_pos - 1); | |
| 483 } | |
| 484 | |
| 485 } } // namespace v8::internal | |
| OLD | NEW |