OLD | NEW |
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef V8_JSON_PARSER_H_ | 5 #ifndef V8_JSON_PARSER_H_ |
6 #define V8_JSON_PARSER_H_ | 6 #define V8_JSON_PARSER_H_ |
7 | 7 |
8 #include "src/char-predicates.h" | |
9 #include "src/conversions.h" | |
10 #include "src/debug/debug.h" | |
11 #include "src/factory.h" | 8 #include "src/factory.h" |
12 #include "src/field-type.h" | 9 #include "src/objects.h" |
13 #include "src/messages.h" | |
14 #include "src/parsing/scanner.h" | |
15 #include "src/parsing/token.h" | |
16 #include "src/transitions.h" | |
17 | 10 |
18 namespace v8 { | 11 namespace v8 { |
19 namespace internal { | 12 namespace internal { |
20 | 13 |
21 enum ParseElementResult { kElementFound, kElementNotFound, kNullHandle }; | 14 enum ParseElementResult { kElementFound, kElementNotFound, kNullHandle }; |
22 | 15 |
23 | 16 |
24 // A simple json parser. | 17 // A simple json parser. |
25 template <bool seq_one_byte> | 18 template <bool seq_one_byte> |
26 class JsonParser BASE_EMBEDDED { | 19 class JsonParser BASE_EMBEDDED { |
27 public: | 20 public: |
28 MUST_USE_RESULT static MaybeHandle<Object> Parse(Handle<String> source) { | 21 MUST_USE_RESULT static MaybeHandle<Object> Parse(Handle<String> source) { |
29 return JsonParser(source).ParseJson(); | 22 return JsonParser(source).ParseJson(); |
30 } | 23 } |
31 | 24 |
32 static const int kEndOfString = -1; | 25 static const int kEndOfString = -1; |
33 | 26 |
34 private: | 27 private: |
35 explicit JsonParser(Handle<String> source) | 28 explicit JsonParser(Handle<String> source); |
36 : source_(source), | |
37 source_length_(source->length()), | |
38 isolate_(source->map()->GetHeap()->isolate()), | |
39 factory_(isolate_->factory()), | |
40 zone_(isolate_->allocator()), | |
41 object_constructor_(isolate_->native_context()->object_function(), | |
42 isolate_), | |
43 position_(-1) { | |
44 source_ = String::Flatten(source_); | |
45 pretenure_ = (source_length_ >= kPretenureTreshold) ? TENURED : NOT_TENURED; | |
46 | |
47 // Optimized fast case where we only have Latin1 characters. | |
48 if (seq_one_byte) { | |
49 seq_source_ = Handle<SeqOneByteString>::cast(source_); | |
50 } | |
51 } | |
52 | 29 |
53 // Parse a string containing a single JSON value. | 30 // Parse a string containing a single JSON value. |
54 MaybeHandle<Object> ParseJson(); | 31 MaybeHandle<Object> ParseJson(); |
55 | 32 |
56 inline void Advance() { | 33 INLINE(void Advance()); |
57 position_++; | |
58 if (position_ >= source_length_) { | |
59 c0_ = kEndOfString; | |
60 } else if (seq_one_byte) { | |
61 c0_ = seq_source_->SeqOneByteStringGet(position_); | |
62 } else { | |
63 c0_ = source_->Get(position_); | |
64 } | |
65 } | |
66 | 34 |
67 // The JSON lexical grammar is specified in the ECMAScript 5 standard, | 35 // The JSON lexical grammar is specified in the ECMAScript 5 standard, |
68 // section 15.12.1.1. The only allowed whitespace characters between tokens | 36 // section 15.12.1.1. The only allowed whitespace characters between tokens |
69 // are tab, carriage-return, newline and space. | 37 // are tab, carriage-return, newline and space. |
70 | 38 |
71 inline void AdvanceSkipWhitespace() { | 39 INLINE(void AdvanceSkipWhitespace()); |
72 do { | 40 INLINE(void SkipWhitespace()); |
73 Advance(); | 41 INLINE(uc32 AdvanceGetChar()); |
74 } while (c0_ == ' ' || c0_ == '\t' || c0_ == '\n' || c0_ == '\r'); | |
75 } | |
76 | |
77 inline void SkipWhitespace() { | |
78 while (c0_ == ' ' || c0_ == '\t' || c0_ == '\n' || c0_ == '\r') { | |
79 Advance(); | |
80 } | |
81 } | |
82 | |
83 inline uc32 AdvanceGetChar() { | |
84 Advance(); | |
85 return c0_; | |
86 } | |
87 | 42 |
88 // Checks that current charater is c. | 43 // Checks that current charater is c. |
89 // If so, then consume c and skip whitespace. | 44 // If so, then consume c and skip whitespace. |
90 inline bool MatchSkipWhiteSpace(uc32 c) { | 45 INLINE(bool MatchSkipWhiteSpace(uc32 c)); |
91 if (c0_ == c) { | |
92 AdvanceSkipWhitespace(); | |
93 return true; | |
94 } | |
95 return false; | |
96 } | |
97 | 46 |
98 // A JSON string (production JSONString) is subset of valid JavaScript string | 47 // A JSON string (production JSONString) is subset of valid JavaScript string |
99 // literals. The string must only be double-quoted (not single-quoted), and | 48 // literals. The string must only be double-quoted (not single-quoted), and |
100 // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and | 49 // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and |
101 // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid. | 50 // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid. |
102 Handle<String> ParseJsonString() { | 51 Handle<String> ParseJsonString() { |
103 return ScanJsonString<false>(); | 52 return ScanJsonString<false>(); |
104 } | 53 } |
105 | 54 |
106 bool ParseJsonString(Handle<String> expected) { | 55 bool ParseJsonString(Handle<String> expected); |
107 int length = expected->length(); | |
108 if (source_->length() - position_ - 1 > length) { | |
109 DisallowHeapAllocation no_gc; | |
110 String::FlatContent content = expected->GetFlatContent(); | |
111 if (content.IsOneByte()) { | |
112 DCHECK_EQ('"', c0_); | |
113 const uint8_t* input_chars = seq_source_->GetChars() + position_ + 1; | |
114 const uint8_t* expected_chars = content.ToOneByteVector().start(); | |
115 for (int i = 0; i < length; i++) { | |
116 uint8_t c0 = input_chars[i]; | |
117 if (c0 != expected_chars[i] || c0 == '"' || c0 < 0x20 || c0 == '\\') { | |
118 return false; | |
119 } | |
120 } | |
121 if (input_chars[length] == '"') { | |
122 position_ = position_ + length + 1; | |
123 AdvanceSkipWhitespace(); | |
124 return true; | |
125 } | |
126 } | |
127 } | |
128 return false; | |
129 } | |
130 | 56 |
131 Handle<String> ParseJsonInternalizedString() { | 57 Handle<String> ParseJsonInternalizedString() { |
132 Handle<String> result = ScanJsonString<true>(); | 58 Handle<String> result = ScanJsonString<true>(); |
133 if (result.is_null()) return result; | 59 if (result.is_null()) return result; |
134 return factory()->InternalizeString(result); | 60 return factory()->InternalizeString(result); |
135 } | 61 } |
136 | 62 |
137 template <bool is_internalized> | 63 template <bool is_internalized> |
138 Handle<String> ScanJsonString(); | 64 Handle<String> ScanJsonString(); |
139 // Creates a new string and copies prefix[start..end] into the beginning | 65 // Creates a new string and copies prefix[start..end] into the beginning |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
181 return Handle<Object>::null(); | 107 return Handle<Object>::null(); |
182 } | 108 } |
183 | 109 |
184 inline Isolate* isolate() { return isolate_; } | 110 inline Isolate* isolate() { return isolate_; } |
185 inline Factory* factory() { return factory_; } | 111 inline Factory* factory() { return factory_; } |
186 inline Handle<JSFunction> object_constructor() { return object_constructor_; } | 112 inline Handle<JSFunction> object_constructor() { return object_constructor_; } |
187 | 113 |
188 static const int kInitialSpecialStringLength = 32; | 114 static const int kInitialSpecialStringLength = 32; |
189 static const int kPretenureTreshold = 100 * 1024; | 115 static const int kPretenureTreshold = 100 * 1024; |
190 | 116 |
191 | |
192 private: | 117 private: |
193 Zone* zone() { return &zone_; } | 118 Zone* zone() { return &zone_; } |
194 | 119 |
195 void CommitStateToJsonObject(Handle<JSObject> json_object, Handle<Map> map, | 120 void CommitStateToJsonObject(Handle<JSObject> json_object, Handle<Map> map, |
196 ZoneList<Handle<Object> >* properties); | 121 ZoneList<Handle<Object> >* properties); |
197 | 122 |
198 Handle<String> source_; | 123 Handle<String> source_; |
199 int source_length_; | 124 int source_length_; |
200 Handle<SeqOneByteString> seq_source_; | 125 Handle<SeqOneByteString> seq_source_; |
201 | 126 |
202 PretenureFlag pretenure_; | 127 PretenureFlag pretenure_; |
203 Isolate* isolate_; | 128 Isolate* isolate_; |
204 Factory* factory_; | 129 Factory* factory_; |
205 Zone zone_; | 130 Zone zone_; |
206 Handle<JSFunction> object_constructor_; | 131 Handle<JSFunction> object_constructor_; |
207 uc32 c0_; | 132 uc32 c0_; |
208 int position_; | 133 int position_; |
209 }; | 134 }; |
210 | 135 |
211 template <bool seq_one_byte> | |
212 MaybeHandle<Object> JsonParser<seq_one_byte>::ParseJson() { | |
213 // Advance to the first character (possibly EOS) | |
214 AdvanceSkipWhitespace(); | |
215 Handle<Object> result = ParseJsonValue(); | |
216 if (result.is_null() || c0_ != kEndOfString) { | |
217 // Some exception (for example stack overflow) is already pending. | |
218 if (isolate_->has_pending_exception()) return Handle<Object>::null(); | |
219 | |
220 // Parse failed. Current character is the unexpected token. | |
221 Factory* factory = this->factory(); | |
222 MessageTemplate::Template message; | |
223 Handle<Object> arg1 = Handle<Smi>(Smi::FromInt(position_), isolate()); | |
224 Handle<Object> arg2; | |
225 | |
226 switch (c0_) { | |
227 case kEndOfString: | |
228 message = MessageTemplate::kJsonParseUnexpectedEOS; | |
229 break; | |
230 case '-': | |
231 case '0': | |
232 case '1': | |
233 case '2': | |
234 case '3': | |
235 case '4': | |
236 case '5': | |
237 case '6': | |
238 case '7': | |
239 case '8': | |
240 case '9': | |
241 message = MessageTemplate::kJsonParseUnexpectedTokenNumber; | |
242 break; | |
243 case '"': | |
244 message = MessageTemplate::kJsonParseUnexpectedTokenString; | |
245 break; | |
246 default: | |
247 message = MessageTemplate::kJsonParseUnexpectedToken; | |
248 arg2 = arg1; | |
249 arg1 = factory->LookupSingleCharacterStringFromCode(c0_); | |
250 break; | |
251 } | |
252 | |
253 Handle<Script> script(factory->NewScript(source_)); | |
254 // We should sent compile error event because we compile JSON object in | |
255 // separated source file. | |
256 isolate()->debug()->OnCompileError(script); | |
257 MessageLocation location(script, position_, position_ + 1); | |
258 Handle<Object> error = factory->NewSyntaxError(message, arg1, arg2); | |
259 return isolate()->template Throw<Object>(error, &location); | |
260 } | |
261 return result; | |
262 } | |
263 | |
264 | |
265 // Parse any JSON value. | |
266 template <bool seq_one_byte> | |
267 Handle<Object> JsonParser<seq_one_byte>::ParseJsonValue() { | |
268 StackLimitCheck stack_check(isolate_); | |
269 if (stack_check.HasOverflowed()) { | |
270 isolate_->StackOverflow(); | |
271 return Handle<Object>::null(); | |
272 } | |
273 | |
274 if (stack_check.InterruptRequested()) { | |
275 ExecutionAccess access(isolate_); | |
276 // Avoid blocking GC in long running parser (v8:3974). | |
277 isolate_->stack_guard()->HandleGCInterrupt(); | |
278 } | |
279 | |
280 if (c0_ == '"') return ParseJsonString(); | |
281 if ((c0_ >= '0' && c0_ <= '9') || c0_ == '-') return ParseJsonNumber(); | |
282 if (c0_ == '{') return ParseJsonObject(); | |
283 if (c0_ == '[') return ParseJsonArray(); | |
284 if (c0_ == 'f') { | |
285 if (AdvanceGetChar() == 'a' && AdvanceGetChar() == 'l' && | |
286 AdvanceGetChar() == 's' && AdvanceGetChar() == 'e') { | |
287 AdvanceSkipWhitespace(); | |
288 return factory()->false_value(); | |
289 } | |
290 return ReportUnexpectedCharacter(); | |
291 } | |
292 if (c0_ == 't') { | |
293 if (AdvanceGetChar() == 'r' && AdvanceGetChar() == 'u' && | |
294 AdvanceGetChar() == 'e') { | |
295 AdvanceSkipWhitespace(); | |
296 return factory()->true_value(); | |
297 } | |
298 return ReportUnexpectedCharacter(); | |
299 } | |
300 if (c0_ == 'n') { | |
301 if (AdvanceGetChar() == 'u' && AdvanceGetChar() == 'l' && | |
302 AdvanceGetChar() == 'l') { | |
303 AdvanceSkipWhitespace(); | |
304 return factory()->null_value(); | |
305 } | |
306 return ReportUnexpectedCharacter(); | |
307 } | |
308 return ReportUnexpectedCharacter(); | |
309 } | |
310 | |
311 | |
312 template <bool seq_one_byte> | |
313 ParseElementResult JsonParser<seq_one_byte>::ParseElement( | |
314 Handle<JSObject> json_object) { | |
315 uint32_t index = 0; | |
316 // Maybe an array index, try to parse it. | |
317 if (c0_ == '0') { | |
318 // With a leading zero, the string has to be "0" only to be an index. | |
319 Advance(); | |
320 } else { | |
321 do { | |
322 int d = c0_ - '0'; | |
323 if (index > 429496729U - ((d + 3) >> 3)) break; | |
324 index = (index * 10) + d; | |
325 Advance(); | |
326 } while (IsDecimalDigit(c0_)); | |
327 } | |
328 | |
329 if (c0_ == '"') { | |
330 // Successfully parsed index, parse and store element. | |
331 AdvanceSkipWhitespace(); | |
332 | |
333 if (c0_ == ':') { | |
334 AdvanceSkipWhitespace(); | |
335 Handle<Object> value = ParseJsonValue(); | |
336 if (!value.is_null()) { | |
337 JSObject::SetOwnElementIgnoreAttributes(json_object, index, value, NONE) | |
338 .Assert(); | |
339 return kElementFound; | |
340 } else { | |
341 return kNullHandle; | |
342 } | |
343 } | |
344 } | |
345 return kElementNotFound; | |
346 } | |
347 | |
348 // Parse a JSON object. Position must be right at '{'. | |
349 template <bool seq_one_byte> | |
350 Handle<Object> JsonParser<seq_one_byte>::ParseJsonObject() { | |
351 HandleScope scope(isolate()); | |
352 Handle<JSObject> json_object = | |
353 factory()->NewJSObject(object_constructor(), pretenure_); | |
354 Handle<Map> map(json_object->map()); | |
355 int descriptor = 0; | |
356 ZoneList<Handle<Object> > properties(8, zone()); | |
357 DCHECK_EQ(c0_, '{'); | |
358 | |
359 bool transitioning = true; | |
360 | |
361 AdvanceSkipWhitespace(); | |
362 if (c0_ != '}') { | |
363 do { | |
364 if (c0_ != '"') return ReportUnexpectedCharacter(); | |
365 | |
366 int start_position = position_; | |
367 Advance(); | |
368 | |
369 if (IsDecimalDigit(c0_)) { | |
370 ParseElementResult element_result = ParseElement(json_object); | |
371 if (element_result == kNullHandle) return Handle<Object>::null(); | |
372 if (element_result == kElementFound) continue; | |
373 } | |
374 // Not an index, fallback to the slow path. | |
375 | |
376 position_ = start_position; | |
377 #ifdef DEBUG | |
378 c0_ = '"'; | |
379 #endif | |
380 | |
381 Handle<String> key; | |
382 Handle<Object> value; | |
383 | |
384 // Try to follow existing transitions as long as possible. Once we stop | |
385 // transitioning, no transition can be found anymore. | |
386 DCHECK(transitioning); | |
387 // First check whether there is a single expected transition. If so, try | |
388 // to parse it first. | |
389 bool follow_expected = false; | |
390 Handle<Map> target; | |
391 if (seq_one_byte) { | |
392 key = TransitionArray::ExpectedTransitionKey(map); | |
393 follow_expected = !key.is_null() && ParseJsonString(key); | |
394 } | |
395 // If the expected transition hits, follow it. | |
396 if (follow_expected) { | |
397 target = TransitionArray::ExpectedTransitionTarget(map); | |
398 } else { | |
399 // If the expected transition failed, parse an internalized string and | |
400 // try to find a matching transition. | |
401 key = ParseJsonInternalizedString(); | |
402 if (key.is_null()) return ReportUnexpectedCharacter(); | |
403 | |
404 target = TransitionArray::FindTransitionToField(map, key); | |
405 // If a transition was found, follow it and continue. | |
406 transitioning = !target.is_null(); | |
407 } | |
408 if (c0_ != ':') return ReportUnexpectedCharacter(); | |
409 | |
410 AdvanceSkipWhitespace(); | |
411 value = ParseJsonValue(); | |
412 if (value.is_null()) return ReportUnexpectedCharacter(); | |
413 | |
414 if (transitioning) { | |
415 PropertyDetails details = | |
416 target->instance_descriptors()->GetDetails(descriptor); | |
417 Representation expected_representation = details.representation(); | |
418 | |
419 if (value->FitsRepresentation(expected_representation)) { | |
420 if (expected_representation.IsHeapObject() && | |
421 !target->instance_descriptors() | |
422 ->GetFieldType(descriptor) | |
423 ->NowContains(value)) { | |
424 Handle<FieldType> value_type( | |
425 value->OptimalType(isolate(), expected_representation)); | |
426 Map::GeneralizeFieldType(target, descriptor, | |
427 expected_representation, value_type); | |
428 } | |
429 DCHECK(target->instance_descriptors() | |
430 ->GetFieldType(descriptor) | |
431 ->NowContains(value)); | |
432 properties.Add(value, zone()); | |
433 map = target; | |
434 descriptor++; | |
435 continue; | |
436 } else { | |
437 transitioning = false; | |
438 } | |
439 } | |
440 | |
441 DCHECK(!transitioning); | |
442 | |
443 // Commit the intermediate state to the object and stop transitioning. | |
444 CommitStateToJsonObject(json_object, map, &properties); | |
445 | |
446 JSObject::DefinePropertyOrElementIgnoreAttributes(json_object, key, value) | |
447 .Check(); | |
448 } while (transitioning && MatchSkipWhiteSpace(',')); | |
449 | |
450 // If we transitioned until the very end, transition the map now. | |
451 if (transitioning) { | |
452 CommitStateToJsonObject(json_object, map, &properties); | |
453 } else { | |
454 while (MatchSkipWhiteSpace(',')) { | |
455 HandleScope local_scope(isolate()); | |
456 if (c0_ != '"') return ReportUnexpectedCharacter(); | |
457 | |
458 int start_position = position_; | |
459 Advance(); | |
460 | |
461 if (IsDecimalDigit(c0_)) { | |
462 ParseElementResult element_result = ParseElement(json_object); | |
463 if (element_result == kNullHandle) return Handle<Object>::null(); | |
464 if (element_result == kElementFound) continue; | |
465 } | |
466 // Not an index, fallback to the slow path. | |
467 | |
468 position_ = start_position; | |
469 #ifdef DEBUG | |
470 c0_ = '"'; | |
471 #endif | |
472 | |
473 Handle<String> key; | |
474 Handle<Object> value; | |
475 | |
476 key = ParseJsonInternalizedString(); | |
477 if (key.is_null() || c0_ != ':') return ReportUnexpectedCharacter(); | |
478 | |
479 AdvanceSkipWhitespace(); | |
480 value = ParseJsonValue(); | |
481 if (value.is_null()) return ReportUnexpectedCharacter(); | |
482 | |
483 JSObject::DefinePropertyOrElementIgnoreAttributes(json_object, key, | |
484 value).Check(); | |
485 } | |
486 } | |
487 | |
488 if (c0_ != '}') { | |
489 return ReportUnexpectedCharacter(); | |
490 } | |
491 } | |
492 AdvanceSkipWhitespace(); | |
493 return scope.CloseAndEscape(json_object); | |
494 } | |
495 | |
496 | |
497 template <bool seq_one_byte> | |
498 void JsonParser<seq_one_byte>::CommitStateToJsonObject( | |
499 Handle<JSObject> json_object, Handle<Map> map, | |
500 ZoneList<Handle<Object> >* properties) { | |
501 JSObject::AllocateStorageForMap(json_object, map); | |
502 DCHECK(!json_object->map()->is_dictionary_map()); | |
503 | |
504 DisallowHeapAllocation no_gc; | |
505 | |
506 int length = properties->length(); | |
507 for (int i = 0; i < length; i++) { | |
508 Handle<Object> value = (*properties)[i]; | |
509 json_object->WriteToField(i, *value); | |
510 } | |
511 } | |
512 | |
513 | |
514 // Parse a JSON array. Position must be right at '['. | |
515 template <bool seq_one_byte> | |
516 Handle<Object> JsonParser<seq_one_byte>::ParseJsonArray() { | |
517 HandleScope scope(isolate()); | |
518 ZoneList<Handle<Object> > elements(4, zone()); | |
519 DCHECK_EQ(c0_, '['); | |
520 | |
521 AdvanceSkipWhitespace(); | |
522 if (c0_ != ']') { | |
523 do { | |
524 Handle<Object> element = ParseJsonValue(); | |
525 if (element.is_null()) return ReportUnexpectedCharacter(); | |
526 elements.Add(element, zone()); | |
527 } while (MatchSkipWhiteSpace(',')); | |
528 if (c0_ != ']') { | |
529 return ReportUnexpectedCharacter(); | |
530 } | |
531 } | |
532 AdvanceSkipWhitespace(); | |
533 // Allocate a fixed array with all the elements. | |
534 Handle<FixedArray> fast_elements = | |
535 factory()->NewFixedArray(elements.length(), pretenure_); | |
536 for (int i = 0, n = elements.length(); i < n; i++) { | |
537 fast_elements->set(i, *elements[i]); | |
538 } | |
539 Handle<Object> json_array = factory()->NewJSArrayWithElements( | |
540 fast_elements, FAST_ELEMENTS, pretenure_); | |
541 return scope.CloseAndEscape(json_array); | |
542 } | |
543 | |
544 | |
545 template <bool seq_one_byte> | |
546 Handle<Object> JsonParser<seq_one_byte>::ParseJsonNumber() { | |
547 bool negative = false; | |
548 int beg_pos = position_; | |
549 if (c0_ == '-') { | |
550 Advance(); | |
551 negative = true; | |
552 } | |
553 if (c0_ == '0') { | |
554 Advance(); | |
555 // Prefix zero is only allowed if it's the only digit before | |
556 // a decimal point or exponent. | |
557 if (IsDecimalDigit(c0_)) return ReportUnexpectedCharacter(); | |
558 } else { | |
559 int i = 0; | |
560 int digits = 0; | |
561 if (c0_ < '1' || c0_ > '9') return ReportUnexpectedCharacter(); | |
562 do { | |
563 i = i * 10 + c0_ - '0'; | |
564 digits++; | |
565 Advance(); | |
566 } while (IsDecimalDigit(c0_)); | |
567 if (c0_ != '.' && c0_ != 'e' && c0_ != 'E' && digits < 10) { | |
568 SkipWhitespace(); | |
569 return Handle<Smi>(Smi::FromInt((negative ? -i : i)), isolate()); | |
570 } | |
571 } | |
572 if (c0_ == '.') { | |
573 Advance(); | |
574 if (!IsDecimalDigit(c0_)) return ReportUnexpectedCharacter(); | |
575 do { | |
576 Advance(); | |
577 } while (IsDecimalDigit(c0_)); | |
578 } | |
579 if (AsciiAlphaToLower(c0_) == 'e') { | |
580 Advance(); | |
581 if (c0_ == '-' || c0_ == '+') Advance(); | |
582 if (!IsDecimalDigit(c0_)) return ReportUnexpectedCharacter(); | |
583 do { | |
584 Advance(); | |
585 } while (IsDecimalDigit(c0_)); | |
586 } | |
587 int length = position_ - beg_pos; | |
588 double number; | |
589 if (seq_one_byte) { | |
590 Vector<const uint8_t> chars(seq_source_->GetChars() + beg_pos, length); | |
591 number = StringToDouble(isolate()->unicode_cache(), chars, | |
592 NO_FLAGS, // Hex, octal or trailing junk. | |
593 std::numeric_limits<double>::quiet_NaN()); | |
594 } else { | |
595 Vector<uint8_t> buffer = Vector<uint8_t>::New(length); | |
596 String::WriteToFlat(*source_, buffer.start(), beg_pos, position_); | |
597 Vector<const uint8_t> result = | |
598 Vector<const uint8_t>(buffer.start(), length); | |
599 number = StringToDouble(isolate()->unicode_cache(), | |
600 result, | |
601 NO_FLAGS, // Hex, octal or trailing junk. | |
602 0.0); | |
603 buffer.Dispose(); | |
604 } | |
605 SkipWhitespace(); | |
606 return factory()->NewNumber(number, pretenure_); | |
607 } | |
608 | |
609 | |
610 template <typename StringType> | |
611 inline void SeqStringSet(Handle<StringType> seq_str, int i, uc32 c); | |
612 | |
613 template <> | |
614 inline void SeqStringSet(Handle<SeqTwoByteString> seq_str, int i, uc32 c) { | |
615 seq_str->SeqTwoByteStringSet(i, c); | |
616 } | |
617 | |
618 template <> | |
619 inline void SeqStringSet(Handle<SeqOneByteString> seq_str, int i, uc32 c) { | |
620 seq_str->SeqOneByteStringSet(i, c); | |
621 } | |
622 | |
623 template <typename StringType> | |
624 inline Handle<StringType> NewRawString(Factory* factory, | |
625 int length, | |
626 PretenureFlag pretenure); | |
627 | |
628 template <> | |
629 inline Handle<SeqTwoByteString> NewRawString(Factory* factory, | |
630 int length, | |
631 PretenureFlag pretenure) { | |
632 return factory->NewRawTwoByteString(length, pretenure).ToHandleChecked(); | |
633 } | |
634 | |
635 template <> | |
636 inline Handle<SeqOneByteString> NewRawString(Factory* factory, | |
637 int length, | |
638 PretenureFlag pretenure) { | |
639 return factory->NewRawOneByteString(length, pretenure).ToHandleChecked(); | |
640 } | |
641 | |
642 | |
643 // Scans the rest of a JSON string starting from position_ and writes | |
644 // prefix[start..end] along with the scanned characters into a | |
645 // sequential string of type StringType. | |
646 template <bool seq_one_byte> | |
647 template <typename StringType, typename SinkChar> | |
648 Handle<String> JsonParser<seq_one_byte>::SlowScanJsonString( | |
649 Handle<String> prefix, int start, int end) { | |
650 int count = end - start; | |
651 int max_length = count + source_length_ - position_; | |
652 int length = Min(max_length, Max(kInitialSpecialStringLength, 2 * count)); | |
653 Handle<StringType> seq_string = | |
654 NewRawString<StringType>(factory(), length, pretenure_); | |
655 // Copy prefix into seq_str. | |
656 SinkChar* dest = seq_string->GetChars(); | |
657 String::WriteToFlat(*prefix, dest, start, end); | |
658 | |
659 while (c0_ != '"') { | |
660 // Check for control character (0x00-0x1f) or unterminated string (<0). | |
661 if (c0_ < 0x20) return Handle<String>::null(); | |
662 if (count >= length) { | |
663 // We need to create a longer sequential string for the result. | |
664 return SlowScanJsonString<StringType, SinkChar>(seq_string, 0, count); | |
665 } | |
666 if (c0_ != '\\') { | |
667 // If the sink can contain UC16 characters, or source_ contains only | |
668 // Latin1 characters, there's no need to test whether we can store the | |
669 // character. Otherwise check whether the UC16 source character can fit | |
670 // in the Latin1 sink. | |
671 if (sizeof(SinkChar) == kUC16Size || seq_one_byte || | |
672 c0_ <= String::kMaxOneByteCharCode) { | |
673 SeqStringSet(seq_string, count++, c0_); | |
674 Advance(); | |
675 } else { | |
676 // StringType is SeqOneByteString and we just read a non-Latin1 char. | |
677 return SlowScanJsonString<SeqTwoByteString, uc16>(seq_string, 0, count); | |
678 } | |
679 } else { | |
680 Advance(); // Advance past the \. | |
681 switch (c0_) { | |
682 case '"': | |
683 case '\\': | |
684 case '/': | |
685 SeqStringSet(seq_string, count++, c0_); | |
686 break; | |
687 case 'b': | |
688 SeqStringSet(seq_string, count++, '\x08'); | |
689 break; | |
690 case 'f': | |
691 SeqStringSet(seq_string, count++, '\x0c'); | |
692 break; | |
693 case 'n': | |
694 SeqStringSet(seq_string, count++, '\x0a'); | |
695 break; | |
696 case 'r': | |
697 SeqStringSet(seq_string, count++, '\x0d'); | |
698 break; | |
699 case 't': | |
700 SeqStringSet(seq_string, count++, '\x09'); | |
701 break; | |
702 case 'u': { | |
703 uc32 value = 0; | |
704 for (int i = 0; i < 4; i++) { | |
705 Advance(); | |
706 int digit = HexValue(c0_); | |
707 if (digit < 0) { | |
708 return Handle<String>::null(); | |
709 } | |
710 value = value * 16 + digit; | |
711 } | |
712 if (sizeof(SinkChar) == kUC16Size || | |
713 value <= String::kMaxOneByteCharCode) { | |
714 SeqStringSet(seq_string, count++, value); | |
715 break; | |
716 } else { | |
717 // StringType is SeqOneByteString and we just read a non-Latin1 | |
718 // char. | |
719 position_ -= 6; // Rewind position_ to \ in \uxxxx. | |
720 Advance(); | |
721 return SlowScanJsonString<SeqTwoByteString, uc16>(seq_string, | |
722 0, | |
723 count); | |
724 } | |
725 } | |
726 default: | |
727 return Handle<String>::null(); | |
728 } | |
729 Advance(); | |
730 } | |
731 } | |
732 | |
733 DCHECK_EQ('"', c0_); | |
734 // Advance past the last '"'. | |
735 AdvanceSkipWhitespace(); | |
736 | |
737 // Shrink seq_string length to count and return. | |
738 return SeqString::Truncate(seq_string, count); | |
739 } | |
740 | |
741 | |
742 template <bool seq_one_byte> | |
743 template <bool is_internalized> | |
744 Handle<String> JsonParser<seq_one_byte>::ScanJsonString() { | |
745 DCHECK_EQ('"', c0_); | |
746 Advance(); | |
747 if (c0_ == '"') { | |
748 AdvanceSkipWhitespace(); | |
749 return factory()->empty_string(); | |
750 } | |
751 | |
752 if (seq_one_byte && is_internalized) { | |
753 // Fast path for existing internalized strings. If the the string being | |
754 // parsed is not a known internalized string, contains backslashes or | |
755 // unexpectedly reaches the end of string, return with an empty handle. | |
756 uint32_t running_hash = isolate()->heap()->HashSeed(); | |
757 int position = position_; | |
758 uc32 c0 = c0_; | |
759 do { | |
760 if (c0 == '\\') { | |
761 c0_ = c0; | |
762 int beg_pos = position_; | |
763 position_ = position; | |
764 return SlowScanJsonString<SeqOneByteString, uint8_t>(source_, | |
765 beg_pos, | |
766 position_); | |
767 } | |
768 if (c0 < 0x20) return Handle<String>::null(); | |
769 running_hash = StringHasher::AddCharacterCore(running_hash, | |
770 static_cast<uint16_t>(c0)); | |
771 position++; | |
772 if (position >= source_length_) return Handle<String>::null(); | |
773 c0 = seq_source_->SeqOneByteStringGet(position); | |
774 } while (c0 != '"'); | |
775 int length = position - position_; | |
776 uint32_t hash = (length <= String::kMaxHashCalcLength) | |
777 ? StringHasher::GetHashCore(running_hash) | |
778 : static_cast<uint32_t>(length); | |
779 Vector<const uint8_t> string_vector( | |
780 seq_source_->GetChars() + position_, length); | |
781 StringTable* string_table = isolate()->heap()->string_table(); | |
782 uint32_t capacity = string_table->Capacity(); | |
783 uint32_t entry = StringTable::FirstProbe(hash, capacity); | |
784 uint32_t count = 1; | |
785 Handle<String> result; | |
786 while (true) { | |
787 Object* element = string_table->KeyAt(entry); | |
788 if (element == isolate()->heap()->undefined_value()) { | |
789 // Lookup failure. | |
790 result = factory()->InternalizeOneByteString( | |
791 seq_source_, position_, length); | |
792 break; | |
793 } | |
794 if (element != isolate()->heap()->the_hole_value() && | |
795 String::cast(element)->IsOneByteEqualTo(string_vector)) { | |
796 result = Handle<String>(String::cast(element), isolate()); | |
797 #ifdef DEBUG | |
798 uint32_t hash_field = | |
799 (hash << String::kHashShift) | String::kIsNotArrayIndexMask; | |
800 DCHECK_EQ(static_cast<int>(result->Hash()), | |
801 static_cast<int>(hash_field >> String::kHashShift)); | |
802 #endif | |
803 break; | |
804 } | |
805 entry = StringTable::NextProbe(entry, count++, capacity); | |
806 } | |
807 position_ = position; | |
808 // Advance past the last '"'. | |
809 AdvanceSkipWhitespace(); | |
810 return result; | |
811 } | |
812 | |
813 int beg_pos = position_; | |
814 // Fast case for Latin1 only without escape characters. | |
815 do { | |
816 // Check for control character (0x00-0x1f) or unterminated string (<0). | |
817 if (c0_ < 0x20) return Handle<String>::null(); | |
818 if (c0_ != '\\') { | |
819 if (seq_one_byte || c0_ <= String::kMaxOneByteCharCode) { | |
820 Advance(); | |
821 } else { | |
822 return SlowScanJsonString<SeqTwoByteString, uc16>(source_, | |
823 beg_pos, | |
824 position_); | |
825 } | |
826 } else { | |
827 return SlowScanJsonString<SeqOneByteString, uint8_t>(source_, | |
828 beg_pos, | |
829 position_); | |
830 } | |
831 } while (c0_ != '"'); | |
832 int length = position_ - beg_pos; | |
833 Handle<String> result = | |
834 factory()->NewRawOneByteString(length, pretenure_).ToHandleChecked(); | |
835 uint8_t* dest = SeqOneByteString::cast(*result)->GetChars(); | |
836 String::WriteToFlat(*source_, dest, beg_pos, position_); | |
837 | |
838 DCHECK_EQ('"', c0_); | |
839 // Advance past the last '"'. | |
840 AdvanceSkipWhitespace(); | |
841 return result; | |
842 } | |
843 | |
844 } // namespace internal | 136 } // namespace internal |
845 } // namespace v8 | 137 } // namespace v8 |
846 | 138 |
847 #endif // V8_JSON_PARSER_H_ | 139 #endif // V8_JSON_PARSER_H_ |
OLD | NEW |