| OLD | NEW |
| (Empty) |
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #ifndef V8_JSON_PARSER_H_ | |
| 6 #define V8_JSON_PARSER_H_ | |
| 7 | |
| 8 #include "src/char-predicates.h" | |
| 9 #include "src/conversions.h" | |
| 10 #include "src/debug/debug.h" | |
| 11 #include "src/factory.h" | |
| 12 #include "src/messages.h" | |
| 13 #include "src/scanner.h" | |
| 14 #include "src/token.h" | |
| 15 #include "src/transitions.h" | |
| 16 #include "src/types.h" | |
| 17 | |
| 18 namespace v8 { | |
| 19 namespace internal { | |
| 20 | |
| 21 enum ParseElementResult { kElementFound, kElementNotFound, kNullHandle }; | |
| 22 | |
| 23 | |
| 24 // A simple json parser. | |
| 25 template <bool seq_one_byte> | |
| 26 class JsonParser BASE_EMBEDDED { | |
| 27 public: | |
| 28 MUST_USE_RESULT static MaybeHandle<Object> Parse(Handle<String> source) { | |
| 29 return JsonParser(source).ParseJson(); | |
| 30 } | |
| 31 | |
| 32 static const int kEndOfString = -1; | |
| 33 | |
| 34 private: | |
| 35 explicit JsonParser(Handle<String> source) | |
| 36 : source_(source), | |
| 37 source_length_(source->length()), | |
| 38 isolate_(source->map()->GetHeap()->isolate()), | |
| 39 factory_(isolate_->factory()), | |
| 40 object_constructor_(isolate_->native_context()->object_function(), | |
| 41 isolate_), | |
| 42 position_(-1) { | |
| 43 source_ = String::Flatten(source_); | |
| 44 pretenure_ = (source_length_ >= kPretenureTreshold) ? TENURED : NOT_TENURED; | |
| 45 | |
| 46 // Optimized fast case where we only have Latin1 characters. | |
| 47 if (seq_one_byte) { | |
| 48 seq_source_ = Handle<SeqOneByteString>::cast(source_); | |
| 49 } | |
| 50 } | |
| 51 | |
| 52 // Parse a string containing a single JSON value. | |
| 53 MaybeHandle<Object> ParseJson(); | |
| 54 | |
| 55 inline void Advance() { | |
| 56 position_++; | |
| 57 if (position_ >= source_length_) { | |
| 58 c0_ = kEndOfString; | |
| 59 } else if (seq_one_byte) { | |
| 60 c0_ = seq_source_->SeqOneByteStringGet(position_); | |
| 61 } else { | |
| 62 c0_ = source_->Get(position_); | |
| 63 } | |
| 64 } | |
| 65 | |
| 66 // The JSON lexical grammar is specified in the ECMAScript 5 standard, | |
| 67 // section 15.12.1.1. The only allowed whitespace characters between tokens | |
| 68 // are tab, carriage-return, newline and space. | |
| 69 | |
| 70 inline void AdvanceSkipWhitespace() { | |
| 71 do { | |
| 72 Advance(); | |
| 73 } while (c0_ == ' ' || c0_ == '\t' || c0_ == '\n' || c0_ == '\r'); | |
| 74 } | |
| 75 | |
| 76 inline void SkipWhitespace() { | |
| 77 while (c0_ == ' ' || c0_ == '\t' || c0_ == '\n' || c0_ == '\r') { | |
| 78 Advance(); | |
| 79 } | |
| 80 } | |
| 81 | |
| 82 inline uc32 AdvanceGetChar() { | |
| 83 Advance(); | |
| 84 return c0_; | |
| 85 } | |
| 86 | |
| 87 // Checks that current charater is c. | |
| 88 // If so, then consume c and skip whitespace. | |
| 89 inline bool MatchSkipWhiteSpace(uc32 c) { | |
| 90 if (c0_ == c) { | |
| 91 AdvanceSkipWhitespace(); | |
| 92 return true; | |
| 93 } | |
| 94 return false; | |
| 95 } | |
| 96 | |
| 97 // A JSON string (production JSONString) is subset of valid JavaScript string | |
| 98 // literals. The string must only be double-quoted (not single-quoted), and | |
| 99 // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and | |
| 100 // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid. | |
| 101 Handle<String> ParseJsonString() { | |
| 102 return ScanJsonString<false>(); | |
| 103 } | |
| 104 | |
| 105 bool ParseJsonString(Handle<String> expected) { | |
| 106 int length = expected->length(); | |
| 107 if (source_->length() - position_ - 1 > length) { | |
| 108 DisallowHeapAllocation no_gc; | |
| 109 String::FlatContent content = expected->GetFlatContent(); | |
| 110 if (content.IsOneByte()) { | |
| 111 DCHECK_EQ('"', c0_); | |
| 112 const uint8_t* input_chars = seq_source_->GetChars() + position_ + 1; | |
| 113 const uint8_t* expected_chars = content.ToOneByteVector().start(); | |
| 114 for (int i = 0; i < length; i++) { | |
| 115 uint8_t c0 = input_chars[i]; | |
| 116 if (c0 != expected_chars[i] || c0 == '"' || c0 < 0x20 || c0 == '\\') { | |
| 117 return false; | |
| 118 } | |
| 119 } | |
| 120 if (input_chars[length] == '"') { | |
| 121 position_ = position_ + length + 1; | |
| 122 AdvanceSkipWhitespace(); | |
| 123 return true; | |
| 124 } | |
| 125 } | |
| 126 } | |
| 127 return false; | |
| 128 } | |
| 129 | |
| 130 Handle<String> ParseJsonInternalizedString() { | |
| 131 return ScanJsonString<true>(); | |
| 132 } | |
| 133 | |
| 134 template <bool is_internalized> | |
| 135 Handle<String> ScanJsonString(); | |
| 136 // Creates a new string and copies prefix[start..end] into the beginning | |
| 137 // of it. Then scans the rest of the string, adding characters after the | |
| 138 // prefix. Called by ScanJsonString when reaching a '\' or non-Latin1 char. | |
| 139 template <typename StringType, typename SinkChar> | |
| 140 Handle<String> SlowScanJsonString(Handle<String> prefix, int start, int end); | |
| 141 | |
| 142 // A JSON number (production JSONNumber) is a subset of the valid JavaScript | |
| 143 // decimal number literals. | |
| 144 // It includes an optional minus sign, must have at least one | |
| 145 // digit before and after a decimal point, may not have prefixed zeros (unless | |
| 146 // the integer part is zero), and may include an exponent part (e.g., "e-10"). | |
| 147 // Hexadecimal and octal numbers are not allowed. | |
| 148 Handle<Object> ParseJsonNumber(); | |
| 149 | |
| 150 // Parse a single JSON value from input (grammar production JSONValue). | |
| 151 // A JSON value is either a (double-quoted) string literal, a number literal, | |
| 152 // one of "true", "false", or "null", or an object or array literal. | |
| 153 Handle<Object> ParseJsonValue(); | |
| 154 | |
| 155 // Parse a JSON object literal (grammar production JSONObject). | |
| 156 // An object literal is a squiggly-braced and comma separated sequence | |
| 157 // (possibly empty) of key/value pairs, where the key is a JSON string | |
| 158 // literal, the value is a JSON value, and the two are separated by a colon. | |
| 159 // A JSON array doesn't allow numbers and identifiers as keys, like a | |
| 160 // JavaScript array. | |
| 161 Handle<Object> ParseJsonObject(); | |
| 162 | |
| 163 // Helper for ParseJsonObject. Parses the form "123": obj, which is recorded | |
| 164 // as an element, not a property. | |
| 165 ParseElementResult ParseElement(Handle<JSObject> json_object); | |
| 166 | |
| 167 // Parses a JSON array literal (grammar production JSONArray). An array | |
| 168 // literal is a square-bracketed and comma separated sequence (possibly empty) | |
| 169 // of JSON values. | |
| 170 // A JSON array doesn't allow leaving out values from the sequence, nor does | |
| 171 // it allow a terminal comma, like a JavaScript array does. | |
| 172 Handle<Object> ParseJsonArray(); | |
| 173 | |
| 174 | |
| 175 // Mark that a parsing error has happened at the current token, and | |
| 176 // return a null handle. Primarily for readability. | |
| 177 inline Handle<Object> ReportUnexpectedCharacter() { | |
| 178 return Handle<Object>::null(); | |
| 179 } | |
| 180 | |
| 181 inline Isolate* isolate() { return isolate_; } | |
| 182 inline Factory* factory() { return factory_; } | |
| 183 inline Handle<JSFunction> object_constructor() { return object_constructor_; } | |
| 184 | |
| 185 static const int kInitialSpecialStringLength = 32; | |
| 186 static const int kPretenureTreshold = 100 * 1024; | |
| 187 | |
| 188 | |
| 189 private: | |
| 190 Zone* zone() { return &zone_; } | |
| 191 | |
| 192 void CommitStateToJsonObject(Handle<JSObject> json_object, Handle<Map> map, | |
| 193 ZoneList<Handle<Object> >* properties); | |
| 194 | |
| 195 Handle<String> source_; | |
| 196 int source_length_; | |
| 197 Handle<SeqOneByteString> seq_source_; | |
| 198 | |
| 199 PretenureFlag pretenure_; | |
| 200 Isolate* isolate_; | |
| 201 Factory* factory_; | |
| 202 Zone zone_; | |
| 203 Handle<JSFunction> object_constructor_; | |
| 204 uc32 c0_; | |
| 205 int position_; | |
| 206 }; | |
| 207 | |
| 208 template <bool seq_one_byte> | |
| 209 MaybeHandle<Object> JsonParser<seq_one_byte>::ParseJson() { | |
| 210 // Advance to the first character (possibly EOS) | |
| 211 AdvanceSkipWhitespace(); | |
| 212 Handle<Object> result = ParseJsonValue(); | |
| 213 if (result.is_null() || c0_ != kEndOfString) { | |
| 214 // Some exception (for example stack overflow) is already pending. | |
| 215 if (isolate_->has_pending_exception()) return Handle<Object>::null(); | |
| 216 | |
| 217 // Parse failed. Current character is the unexpected token. | |
| 218 Factory* factory = this->factory(); | |
| 219 MessageTemplate::Template message; | |
| 220 Handle<String> argument; | |
| 221 | |
| 222 switch (c0_) { | |
| 223 case kEndOfString: | |
| 224 message = MessageTemplate::kUnexpectedEOS; | |
| 225 break; | |
| 226 case '-': | |
| 227 case '0': | |
| 228 case '1': | |
| 229 case '2': | |
| 230 case '3': | |
| 231 case '4': | |
| 232 case '5': | |
| 233 case '6': | |
| 234 case '7': | |
| 235 case '8': | |
| 236 case '9': | |
| 237 message = MessageTemplate::kUnexpectedTokenNumber; | |
| 238 break; | |
| 239 case '"': | |
| 240 message = MessageTemplate::kUnexpectedTokenString; | |
| 241 break; | |
| 242 default: | |
| 243 message = MessageTemplate::kUnexpectedToken; | |
| 244 argument = factory->LookupSingleCharacterStringFromCode(c0_); | |
| 245 break; | |
| 246 } | |
| 247 | |
| 248 Handle<Script> script(factory->NewScript(source_)); | |
| 249 // We should sent compile error event because we compile JSON object in | |
| 250 // separated source file. | |
| 251 isolate()->debug()->OnCompileError(script); | |
| 252 MessageLocation location(script, position_, position_ + 1); | |
| 253 Handle<Object> error = factory->NewSyntaxError(message, argument); | |
| 254 return isolate()->template Throw<Object>(error, &location); | |
| 255 } | |
| 256 return result; | |
| 257 } | |
| 258 | |
| 259 | |
| 260 // Parse any JSON value. | |
| 261 template <bool seq_one_byte> | |
| 262 Handle<Object> JsonParser<seq_one_byte>::ParseJsonValue() { | |
| 263 StackLimitCheck stack_check(isolate_); | |
| 264 if (stack_check.HasOverflowed()) { | |
| 265 isolate_->StackOverflow(); | |
| 266 return Handle<Object>::null(); | |
| 267 } | |
| 268 | |
| 269 if (stack_check.InterruptRequested()) { | |
| 270 ExecutionAccess access(isolate_); | |
| 271 // Avoid blocking GC in long running parser (v8:3974). | |
| 272 isolate_->stack_guard()->HandleGCInterrupt(); | |
| 273 } | |
| 274 | |
| 275 if (c0_ == '"') return ParseJsonString(); | |
| 276 if ((c0_ >= '0' && c0_ <= '9') || c0_ == '-') return ParseJsonNumber(); | |
| 277 if (c0_ == '{') return ParseJsonObject(); | |
| 278 if (c0_ == '[') return ParseJsonArray(); | |
| 279 if (c0_ == 'f') { | |
| 280 if (AdvanceGetChar() == 'a' && AdvanceGetChar() == 'l' && | |
| 281 AdvanceGetChar() == 's' && AdvanceGetChar() == 'e') { | |
| 282 AdvanceSkipWhitespace(); | |
| 283 return factory()->false_value(); | |
| 284 } | |
| 285 return ReportUnexpectedCharacter(); | |
| 286 } | |
| 287 if (c0_ == 't') { | |
| 288 if (AdvanceGetChar() == 'r' && AdvanceGetChar() == 'u' && | |
| 289 AdvanceGetChar() == 'e') { | |
| 290 AdvanceSkipWhitespace(); | |
| 291 return factory()->true_value(); | |
| 292 } | |
| 293 return ReportUnexpectedCharacter(); | |
| 294 } | |
| 295 if (c0_ == 'n') { | |
| 296 if (AdvanceGetChar() == 'u' && AdvanceGetChar() == 'l' && | |
| 297 AdvanceGetChar() == 'l') { | |
| 298 AdvanceSkipWhitespace(); | |
| 299 return factory()->null_value(); | |
| 300 } | |
| 301 return ReportUnexpectedCharacter(); | |
| 302 } | |
| 303 return ReportUnexpectedCharacter(); | |
| 304 } | |
| 305 | |
| 306 | |
| 307 template <bool seq_one_byte> | |
| 308 ParseElementResult JsonParser<seq_one_byte>::ParseElement( | |
| 309 Handle<JSObject> json_object) { | |
| 310 uint32_t index = 0; | |
| 311 // Maybe an array index, try to parse it. | |
| 312 if (c0_ == '0') { | |
| 313 // With a leading zero, the string has to be "0" only to be an index. | |
| 314 Advance(); | |
| 315 } else { | |
| 316 do { | |
| 317 int d = c0_ - '0'; | |
| 318 if (index > 429496729U - ((d + 3) >> 3)) break; | |
| 319 index = (index * 10) + d; | |
| 320 Advance(); | |
| 321 } while (IsDecimalDigit(c0_)); | |
| 322 } | |
| 323 | |
| 324 if (c0_ == '"') { | |
| 325 // Successfully parsed index, parse and store element. | |
| 326 AdvanceSkipWhitespace(); | |
| 327 | |
| 328 if (c0_ == ':') { | |
| 329 AdvanceSkipWhitespace(); | |
| 330 Handle<Object> value = ParseJsonValue(); | |
| 331 if (!value.is_null()) { | |
| 332 JSObject::SetOwnElementIgnoreAttributes(json_object, index, value, NONE) | |
| 333 .Assert(); | |
| 334 return kElementFound; | |
| 335 } else { | |
| 336 return kNullHandle; | |
| 337 } | |
| 338 } | |
| 339 } | |
| 340 return kElementNotFound; | |
| 341 } | |
| 342 | |
| 343 // Parse a JSON object. Position must be right at '{'. | |
| 344 template <bool seq_one_byte> | |
| 345 Handle<Object> JsonParser<seq_one_byte>::ParseJsonObject() { | |
| 346 HandleScope scope(isolate()); | |
| 347 Handle<JSObject> json_object = | |
| 348 factory()->NewJSObject(object_constructor(), pretenure_); | |
| 349 Handle<Map> map(json_object->map()); | |
| 350 int descriptor = 0; | |
| 351 ZoneList<Handle<Object> > properties(8, zone()); | |
| 352 DCHECK_EQ(c0_, '{'); | |
| 353 | |
| 354 bool transitioning = true; | |
| 355 | |
| 356 AdvanceSkipWhitespace(); | |
| 357 if (c0_ != '}') { | |
| 358 do { | |
| 359 if (c0_ != '"') return ReportUnexpectedCharacter(); | |
| 360 | |
| 361 int start_position = position_; | |
| 362 Advance(); | |
| 363 | |
| 364 if (IsDecimalDigit(c0_)) { | |
| 365 ParseElementResult element_result = ParseElement(json_object); | |
| 366 if (element_result == kNullHandle) return Handle<Object>::null(); | |
| 367 if (element_result == kElementFound) continue; | |
| 368 } | |
| 369 // Not an index, fallback to the slow path. | |
| 370 | |
| 371 position_ = start_position; | |
| 372 #ifdef DEBUG | |
| 373 c0_ = '"'; | |
| 374 #endif | |
| 375 | |
| 376 Handle<String> key; | |
| 377 Handle<Object> value; | |
| 378 | |
| 379 // Try to follow existing transitions as long as possible. Once we stop | |
| 380 // transitioning, no transition can be found anymore. | |
| 381 DCHECK(transitioning); | |
| 382 // First check whether there is a single expected transition. If so, try | |
| 383 // to parse it first. | |
| 384 bool follow_expected = false; | |
| 385 Handle<Map> target; | |
| 386 if (seq_one_byte) { | |
| 387 key = TransitionArray::ExpectedTransitionKey(map); | |
| 388 follow_expected = !key.is_null() && ParseJsonString(key); | |
| 389 } | |
| 390 // If the expected transition hits, follow it. | |
| 391 if (follow_expected) { | |
| 392 target = TransitionArray::ExpectedTransitionTarget(map); | |
| 393 } else { | |
| 394 // If the expected transition failed, parse an internalized string and | |
| 395 // try to find a matching transition. | |
| 396 key = ParseJsonInternalizedString(); | |
| 397 if (key.is_null()) return ReportUnexpectedCharacter(); | |
| 398 | |
| 399 target = TransitionArray::FindTransitionToField(map, key); | |
| 400 // If a transition was found, follow it and continue. | |
| 401 transitioning = !target.is_null(); | |
| 402 } | |
| 403 if (c0_ != ':') return ReportUnexpectedCharacter(); | |
| 404 | |
| 405 AdvanceSkipWhitespace(); | |
| 406 value = ParseJsonValue(); | |
| 407 if (value.is_null()) return ReportUnexpectedCharacter(); | |
| 408 | |
| 409 if (transitioning) { | |
| 410 PropertyDetails details = | |
| 411 target->instance_descriptors()->GetDetails(descriptor); | |
| 412 Representation expected_representation = details.representation(); | |
| 413 | |
| 414 if (value->FitsRepresentation(expected_representation)) { | |
| 415 if (expected_representation.IsHeapObject() && | |
| 416 !target->instance_descriptors() | |
| 417 ->GetFieldType(descriptor) | |
| 418 ->NowContains(value)) { | |
| 419 Handle<HeapType> value_type( | |
| 420 value->OptimalType(isolate(), expected_representation)); | |
| 421 Map::GeneralizeFieldType(target, descriptor, | |
| 422 expected_representation, value_type); | |
| 423 } | |
| 424 DCHECK(target->instance_descriptors() | |
| 425 ->GetFieldType(descriptor) | |
| 426 ->NowContains(value)); | |
| 427 properties.Add(value, zone()); | |
| 428 map = target; | |
| 429 descriptor++; | |
| 430 continue; | |
| 431 } else { | |
| 432 transitioning = false; | |
| 433 } | |
| 434 } | |
| 435 | |
| 436 DCHECK(!transitioning); | |
| 437 | |
| 438 // Commit the intermediate state to the object and stop transitioning. | |
| 439 CommitStateToJsonObject(json_object, map, &properties); | |
| 440 | |
| 441 JSObject::DefinePropertyOrElementIgnoreAttributes(json_object, key, value) | |
| 442 .Check(); | |
| 443 } while (transitioning && MatchSkipWhiteSpace(',')); | |
| 444 | |
| 445 // If we transitioned until the very end, transition the map now. | |
| 446 if (transitioning) { | |
| 447 CommitStateToJsonObject(json_object, map, &properties); | |
| 448 } else { | |
| 449 while (MatchSkipWhiteSpace(',')) { | |
| 450 HandleScope local_scope(isolate()); | |
| 451 if (c0_ != '"') return ReportUnexpectedCharacter(); | |
| 452 | |
| 453 int start_position = position_; | |
| 454 Advance(); | |
| 455 | |
| 456 if (IsDecimalDigit(c0_)) { | |
| 457 ParseElementResult element_result = ParseElement(json_object); | |
| 458 if (element_result == kNullHandle) return Handle<Object>::null(); | |
| 459 if (element_result == kElementFound) continue; | |
| 460 } | |
| 461 // Not an index, fallback to the slow path. | |
| 462 | |
| 463 position_ = start_position; | |
| 464 #ifdef DEBUG | |
| 465 c0_ = '"'; | |
| 466 #endif | |
| 467 | |
| 468 Handle<String> key; | |
| 469 Handle<Object> value; | |
| 470 | |
| 471 key = ParseJsonInternalizedString(); | |
| 472 if (key.is_null() || c0_ != ':') return ReportUnexpectedCharacter(); | |
| 473 | |
| 474 AdvanceSkipWhitespace(); | |
| 475 value = ParseJsonValue(); | |
| 476 if (value.is_null()) return ReportUnexpectedCharacter(); | |
| 477 | |
| 478 JSObject::DefinePropertyOrElementIgnoreAttributes(json_object, key, | |
| 479 value).Check(); | |
| 480 } | |
| 481 } | |
| 482 | |
| 483 if (c0_ != '}') { | |
| 484 return ReportUnexpectedCharacter(); | |
| 485 } | |
| 486 } | |
| 487 AdvanceSkipWhitespace(); | |
| 488 return scope.CloseAndEscape(json_object); | |
| 489 } | |
| 490 | |
| 491 | |
| 492 template <bool seq_one_byte> | |
| 493 void JsonParser<seq_one_byte>::CommitStateToJsonObject( | |
| 494 Handle<JSObject> json_object, Handle<Map> map, | |
| 495 ZoneList<Handle<Object> >* properties) { | |
| 496 JSObject::AllocateStorageForMap(json_object, map); | |
| 497 DCHECK(!json_object->map()->is_dictionary_map()); | |
| 498 | |
| 499 DisallowHeapAllocation no_gc; | |
| 500 | |
| 501 int length = properties->length(); | |
| 502 for (int i = 0; i < length; i++) { | |
| 503 Handle<Object> value = (*properties)[i]; | |
| 504 json_object->WriteToField(i, *value); | |
| 505 } | |
| 506 } | |
| 507 | |
| 508 | |
| 509 // Parse a JSON array. Position must be right at '['. | |
| 510 template <bool seq_one_byte> | |
| 511 Handle<Object> JsonParser<seq_one_byte>::ParseJsonArray() { | |
| 512 HandleScope scope(isolate()); | |
| 513 ZoneList<Handle<Object> > elements(4, zone()); | |
| 514 DCHECK_EQ(c0_, '['); | |
| 515 | |
| 516 AdvanceSkipWhitespace(); | |
| 517 if (c0_ != ']') { | |
| 518 do { | |
| 519 Handle<Object> element = ParseJsonValue(); | |
| 520 if (element.is_null()) return ReportUnexpectedCharacter(); | |
| 521 elements.Add(element, zone()); | |
| 522 } while (MatchSkipWhiteSpace(',')); | |
| 523 if (c0_ != ']') { | |
| 524 return ReportUnexpectedCharacter(); | |
| 525 } | |
| 526 } | |
| 527 AdvanceSkipWhitespace(); | |
| 528 // Allocate a fixed array with all the elements. | |
| 529 Handle<FixedArray> fast_elements = | |
| 530 factory()->NewFixedArray(elements.length(), pretenure_); | |
| 531 for (int i = 0, n = elements.length(); i < n; i++) { | |
| 532 fast_elements->set(i, *elements[i]); | |
| 533 } | |
| 534 Handle<Object> json_array = factory()->NewJSArrayWithElements( | |
| 535 fast_elements, FAST_ELEMENTS, Strength::WEAK, pretenure_); | |
| 536 return scope.CloseAndEscape(json_array); | |
| 537 } | |
| 538 | |
| 539 | |
| 540 template <bool seq_one_byte> | |
| 541 Handle<Object> JsonParser<seq_one_byte>::ParseJsonNumber() { | |
| 542 bool negative = false; | |
| 543 int beg_pos = position_; | |
| 544 if (c0_ == '-') { | |
| 545 Advance(); | |
| 546 negative = true; | |
| 547 } | |
| 548 if (c0_ == '0') { | |
| 549 Advance(); | |
| 550 // Prefix zero is only allowed if it's the only digit before | |
| 551 // a decimal point or exponent. | |
| 552 if (IsDecimalDigit(c0_)) return ReportUnexpectedCharacter(); | |
| 553 } else { | |
| 554 int i = 0; | |
| 555 int digits = 0; | |
| 556 if (c0_ < '1' || c0_ > '9') return ReportUnexpectedCharacter(); | |
| 557 do { | |
| 558 i = i * 10 + c0_ - '0'; | |
| 559 digits++; | |
| 560 Advance(); | |
| 561 } while (IsDecimalDigit(c0_)); | |
| 562 if (c0_ != '.' && c0_ != 'e' && c0_ != 'E' && digits < 10) { | |
| 563 SkipWhitespace(); | |
| 564 return Handle<Smi>(Smi::FromInt((negative ? -i : i)), isolate()); | |
| 565 } | |
| 566 } | |
| 567 if (c0_ == '.') { | |
| 568 Advance(); | |
| 569 if (!IsDecimalDigit(c0_)) return ReportUnexpectedCharacter(); | |
| 570 do { | |
| 571 Advance(); | |
| 572 } while (IsDecimalDigit(c0_)); | |
| 573 } | |
| 574 if (AsciiAlphaToLower(c0_) == 'e') { | |
| 575 Advance(); | |
| 576 if (c0_ == '-' || c0_ == '+') Advance(); | |
| 577 if (!IsDecimalDigit(c0_)) return ReportUnexpectedCharacter(); | |
| 578 do { | |
| 579 Advance(); | |
| 580 } while (IsDecimalDigit(c0_)); | |
| 581 } | |
| 582 int length = position_ - beg_pos; | |
| 583 double number; | |
| 584 if (seq_one_byte) { | |
| 585 Vector<const uint8_t> chars(seq_source_->GetChars() + beg_pos, length); | |
| 586 number = StringToDouble(isolate()->unicode_cache(), chars, | |
| 587 NO_FLAGS, // Hex, octal or trailing junk. | |
| 588 std::numeric_limits<double>::quiet_NaN()); | |
| 589 } else { | |
| 590 Vector<uint8_t> buffer = Vector<uint8_t>::New(length); | |
| 591 String::WriteToFlat(*source_, buffer.start(), beg_pos, position_); | |
| 592 Vector<const uint8_t> result = | |
| 593 Vector<const uint8_t>(buffer.start(), length); | |
| 594 number = StringToDouble(isolate()->unicode_cache(), | |
| 595 result, | |
| 596 NO_FLAGS, // Hex, octal or trailing junk. | |
| 597 0.0); | |
| 598 buffer.Dispose(); | |
| 599 } | |
| 600 SkipWhitespace(); | |
| 601 return factory()->NewNumber(number, pretenure_); | |
| 602 } | |
| 603 | |
| 604 | |
| 605 template <typename StringType> | |
| 606 inline void SeqStringSet(Handle<StringType> seq_str, int i, uc32 c); | |
| 607 | |
| 608 template <> | |
| 609 inline void SeqStringSet(Handle<SeqTwoByteString> seq_str, int i, uc32 c) { | |
| 610 seq_str->SeqTwoByteStringSet(i, c); | |
| 611 } | |
| 612 | |
| 613 template <> | |
| 614 inline void SeqStringSet(Handle<SeqOneByteString> seq_str, int i, uc32 c) { | |
| 615 seq_str->SeqOneByteStringSet(i, c); | |
| 616 } | |
| 617 | |
| 618 template <typename StringType> | |
| 619 inline Handle<StringType> NewRawString(Factory* factory, | |
| 620 int length, | |
| 621 PretenureFlag pretenure); | |
| 622 | |
| 623 template <> | |
| 624 inline Handle<SeqTwoByteString> NewRawString(Factory* factory, | |
| 625 int length, | |
| 626 PretenureFlag pretenure) { | |
| 627 return factory->NewRawTwoByteString(length, pretenure).ToHandleChecked(); | |
| 628 } | |
| 629 | |
| 630 template <> | |
| 631 inline Handle<SeqOneByteString> NewRawString(Factory* factory, | |
| 632 int length, | |
| 633 PretenureFlag pretenure) { | |
| 634 return factory->NewRawOneByteString(length, pretenure).ToHandleChecked(); | |
| 635 } | |
| 636 | |
| 637 | |
| 638 // Scans the rest of a JSON string starting from position_ and writes | |
| 639 // prefix[start..end] along with the scanned characters into a | |
| 640 // sequential string of type StringType. | |
| 641 template <bool seq_one_byte> | |
| 642 template <typename StringType, typename SinkChar> | |
| 643 Handle<String> JsonParser<seq_one_byte>::SlowScanJsonString( | |
| 644 Handle<String> prefix, int start, int end) { | |
| 645 int count = end - start; | |
| 646 int max_length = count + source_length_ - position_; | |
| 647 int length = Min(max_length, Max(kInitialSpecialStringLength, 2 * count)); | |
| 648 Handle<StringType> seq_string = | |
| 649 NewRawString<StringType>(factory(), length, pretenure_); | |
| 650 // Copy prefix into seq_str. | |
| 651 SinkChar* dest = seq_string->GetChars(); | |
| 652 String::WriteToFlat(*prefix, dest, start, end); | |
| 653 | |
| 654 while (c0_ != '"') { | |
| 655 // Check for control character (0x00-0x1f) or unterminated string (<0). | |
| 656 if (c0_ < 0x20) return Handle<String>::null(); | |
| 657 if (count >= length) { | |
| 658 // We need to create a longer sequential string for the result. | |
| 659 return SlowScanJsonString<StringType, SinkChar>(seq_string, 0, count); | |
| 660 } | |
| 661 if (c0_ != '\\') { | |
| 662 // If the sink can contain UC16 characters, or source_ contains only | |
| 663 // Latin1 characters, there's no need to test whether we can store the | |
| 664 // character. Otherwise check whether the UC16 source character can fit | |
| 665 // in the Latin1 sink. | |
| 666 if (sizeof(SinkChar) == kUC16Size || seq_one_byte || | |
| 667 c0_ <= String::kMaxOneByteCharCode) { | |
| 668 SeqStringSet(seq_string, count++, c0_); | |
| 669 Advance(); | |
| 670 } else { | |
| 671 // StringType is SeqOneByteString and we just read a non-Latin1 char. | |
| 672 return SlowScanJsonString<SeqTwoByteString, uc16>(seq_string, 0, count); | |
| 673 } | |
| 674 } else { | |
| 675 Advance(); // Advance past the \. | |
| 676 switch (c0_) { | |
| 677 case '"': | |
| 678 case '\\': | |
| 679 case '/': | |
| 680 SeqStringSet(seq_string, count++, c0_); | |
| 681 break; | |
| 682 case 'b': | |
| 683 SeqStringSet(seq_string, count++, '\x08'); | |
| 684 break; | |
| 685 case 'f': | |
| 686 SeqStringSet(seq_string, count++, '\x0c'); | |
| 687 break; | |
| 688 case 'n': | |
| 689 SeqStringSet(seq_string, count++, '\x0a'); | |
| 690 break; | |
| 691 case 'r': | |
| 692 SeqStringSet(seq_string, count++, '\x0d'); | |
| 693 break; | |
| 694 case 't': | |
| 695 SeqStringSet(seq_string, count++, '\x09'); | |
| 696 break; | |
| 697 case 'u': { | |
| 698 uc32 value = 0; | |
| 699 for (int i = 0; i < 4; i++) { | |
| 700 Advance(); | |
| 701 int digit = HexValue(c0_); | |
| 702 if (digit < 0) { | |
| 703 return Handle<String>::null(); | |
| 704 } | |
| 705 value = value * 16 + digit; | |
| 706 } | |
| 707 if (sizeof(SinkChar) == kUC16Size || | |
| 708 value <= String::kMaxOneByteCharCode) { | |
| 709 SeqStringSet(seq_string, count++, value); | |
| 710 break; | |
| 711 } else { | |
| 712 // StringType is SeqOneByteString and we just read a non-Latin1 | |
| 713 // char. | |
| 714 position_ -= 6; // Rewind position_ to \ in \uxxxx. | |
| 715 Advance(); | |
| 716 return SlowScanJsonString<SeqTwoByteString, uc16>(seq_string, | |
| 717 0, | |
| 718 count); | |
| 719 } | |
| 720 } | |
| 721 default: | |
| 722 return Handle<String>::null(); | |
| 723 } | |
| 724 Advance(); | |
| 725 } | |
| 726 } | |
| 727 | |
| 728 DCHECK_EQ('"', c0_); | |
| 729 // Advance past the last '"'. | |
| 730 AdvanceSkipWhitespace(); | |
| 731 | |
| 732 // Shrink seq_string length to count and return. | |
| 733 return SeqString::Truncate(seq_string, count); | |
| 734 } | |
| 735 | |
| 736 | |
| 737 template <bool seq_one_byte> | |
| 738 template <bool is_internalized> | |
| 739 Handle<String> JsonParser<seq_one_byte>::ScanJsonString() { | |
| 740 DCHECK_EQ('"', c0_); | |
| 741 Advance(); | |
| 742 if (c0_ == '"') { | |
| 743 AdvanceSkipWhitespace(); | |
| 744 return factory()->empty_string(); | |
| 745 } | |
| 746 | |
| 747 if (seq_one_byte && is_internalized) { | |
| 748 // Fast path for existing internalized strings. If the the string being | |
| 749 // parsed is not a known internalized string, contains backslashes or | |
| 750 // unexpectedly reaches the end of string, return with an empty handle. | |
| 751 uint32_t running_hash = isolate()->heap()->HashSeed(); | |
| 752 int position = position_; | |
| 753 uc32 c0 = c0_; | |
| 754 do { | |
| 755 if (c0 == '\\') { | |
| 756 c0_ = c0; | |
| 757 int beg_pos = position_; | |
| 758 position_ = position; | |
| 759 return SlowScanJsonString<SeqOneByteString, uint8_t>(source_, | |
| 760 beg_pos, | |
| 761 position_); | |
| 762 } | |
| 763 if (c0 < 0x20) return Handle<String>::null(); | |
| 764 if (static_cast<uint32_t>(c0) > | |
| 765 unibrow::Utf16::kMaxNonSurrogateCharCode) { | |
| 766 running_hash = | |
| 767 StringHasher::AddCharacterCore(running_hash, | |
| 768 unibrow::Utf16::LeadSurrogate(c0)); | |
| 769 running_hash = | |
| 770 StringHasher::AddCharacterCore(running_hash, | |
| 771 unibrow::Utf16::TrailSurrogate(c0)); | |
| 772 } else { | |
| 773 running_hash = StringHasher::AddCharacterCore(running_hash, c0); | |
| 774 } | |
| 775 position++; | |
| 776 if (position >= source_length_) return Handle<String>::null(); | |
| 777 c0 = seq_source_->SeqOneByteStringGet(position); | |
| 778 } while (c0 != '"'); | |
| 779 int length = position - position_; | |
| 780 uint32_t hash = (length <= String::kMaxHashCalcLength) | |
| 781 ? StringHasher::GetHashCore(running_hash) | |
| 782 : static_cast<uint32_t>(length); | |
| 783 Vector<const uint8_t> string_vector( | |
| 784 seq_source_->GetChars() + position_, length); | |
| 785 StringTable* string_table = isolate()->heap()->string_table(); | |
| 786 uint32_t capacity = string_table->Capacity(); | |
| 787 uint32_t entry = StringTable::FirstProbe(hash, capacity); | |
| 788 uint32_t count = 1; | |
| 789 Handle<String> result; | |
| 790 while (true) { | |
| 791 Object* element = string_table->KeyAt(entry); | |
| 792 if (element == isolate()->heap()->undefined_value()) { | |
| 793 // Lookup failure. | |
| 794 result = factory()->InternalizeOneByteString( | |
| 795 seq_source_, position_, length); | |
| 796 break; | |
| 797 } | |
| 798 if (element != isolate()->heap()->the_hole_value() && | |
| 799 String::cast(element)->IsOneByteEqualTo(string_vector)) { | |
| 800 result = Handle<String>(String::cast(element), isolate()); | |
| 801 #ifdef DEBUG | |
| 802 uint32_t hash_field = | |
| 803 (hash << String::kHashShift) | String::kIsNotArrayIndexMask; | |
| 804 DCHECK_EQ(static_cast<int>(result->Hash()), | |
| 805 static_cast<int>(hash_field >> String::kHashShift)); | |
| 806 #endif | |
| 807 break; | |
| 808 } | |
| 809 entry = StringTable::NextProbe(entry, count++, capacity); | |
| 810 } | |
| 811 position_ = position; | |
| 812 // Advance past the last '"'. | |
| 813 AdvanceSkipWhitespace(); | |
| 814 return result; | |
| 815 } | |
| 816 | |
| 817 int beg_pos = position_; | |
| 818 // Fast case for Latin1 only without escape characters. | |
| 819 do { | |
| 820 // Check for control character (0x00-0x1f) or unterminated string (<0). | |
| 821 if (c0_ < 0x20) return Handle<String>::null(); | |
| 822 if (c0_ != '\\') { | |
| 823 if (seq_one_byte || c0_ <= String::kMaxOneByteCharCode) { | |
| 824 Advance(); | |
| 825 } else { | |
| 826 return SlowScanJsonString<SeqTwoByteString, uc16>(source_, | |
| 827 beg_pos, | |
| 828 position_); | |
| 829 } | |
| 830 } else { | |
| 831 return SlowScanJsonString<SeqOneByteString, uint8_t>(source_, | |
| 832 beg_pos, | |
| 833 position_); | |
| 834 } | |
| 835 } while (c0_ != '"'); | |
| 836 int length = position_ - beg_pos; | |
| 837 Handle<String> result = | |
| 838 factory()->NewRawOneByteString(length, pretenure_).ToHandleChecked(); | |
| 839 uint8_t* dest = SeqOneByteString::cast(*result)->GetChars(); | |
| 840 String::WriteToFlat(*source_, dest, beg_pos, position_); | |
| 841 | |
| 842 DCHECK_EQ('"', c0_); | |
| 843 // Advance past the last '"'. | |
| 844 AdvanceSkipWhitespace(); | |
| 845 return result; | |
| 846 } | |
| 847 | |
| 848 } // namespace internal | |
| 849 } // namespace v8 | |
| 850 | |
| 851 #endif // V8_JSON_PARSER_H_ | |
| OLD | NEW |