OLD | NEW |
| (Empty) |
1 // Copyright 2011 the V8 project authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #ifndef V8_JSON_PARSER_H_ | |
6 #define V8_JSON_PARSER_H_ | |
7 | |
8 #include "src/char-predicates.h" | |
9 #include "src/conversions.h" | |
10 #include "src/debug/debug.h" | |
11 #include "src/factory.h" | |
12 #include "src/messages.h" | |
13 #include "src/scanner.h" | |
14 #include "src/token.h" | |
15 #include "src/transitions.h" | |
16 #include "src/types.h" | |
17 | |
18 namespace v8 { | |
19 namespace internal { | |
20 | |
21 enum ParseElementResult { kElementFound, kElementNotFound, kNullHandle }; | |
22 | |
23 | |
24 // A simple json parser. | |
25 template <bool seq_one_byte> | |
26 class JsonParser BASE_EMBEDDED { | |
27 public: | |
28 MUST_USE_RESULT static MaybeHandle<Object> Parse(Handle<String> source) { | |
29 return JsonParser(source).ParseJson(); | |
30 } | |
31 | |
32 static const int kEndOfString = -1; | |
33 | |
34 private: | |
35 explicit JsonParser(Handle<String> source) | |
36 : source_(source), | |
37 source_length_(source->length()), | |
38 isolate_(source->map()->GetHeap()->isolate()), | |
39 factory_(isolate_->factory()), | |
40 object_constructor_(isolate_->native_context()->object_function(), | |
41 isolate_), | |
42 position_(-1) { | |
43 source_ = String::Flatten(source_); | |
44 pretenure_ = (source_length_ >= kPretenureTreshold) ? TENURED : NOT_TENURED; | |
45 | |
46 // Optimized fast case where we only have Latin1 characters. | |
47 if (seq_one_byte) { | |
48 seq_source_ = Handle<SeqOneByteString>::cast(source_); | |
49 } | |
50 } | |
51 | |
52 // Parse a string containing a single JSON value. | |
53 MaybeHandle<Object> ParseJson(); | |
54 | |
55 inline void Advance() { | |
56 position_++; | |
57 if (position_ >= source_length_) { | |
58 c0_ = kEndOfString; | |
59 } else if (seq_one_byte) { | |
60 c0_ = seq_source_->SeqOneByteStringGet(position_); | |
61 } else { | |
62 c0_ = source_->Get(position_); | |
63 } | |
64 } | |
65 | |
66 // The JSON lexical grammar is specified in the ECMAScript 5 standard, | |
67 // section 15.12.1.1. The only allowed whitespace characters between tokens | |
68 // are tab, carriage-return, newline and space. | |
69 | |
70 inline void AdvanceSkipWhitespace() { | |
71 do { | |
72 Advance(); | |
73 } while (c0_ == ' ' || c0_ == '\t' || c0_ == '\n' || c0_ == '\r'); | |
74 } | |
75 | |
76 inline void SkipWhitespace() { | |
77 while (c0_ == ' ' || c0_ == '\t' || c0_ == '\n' || c0_ == '\r') { | |
78 Advance(); | |
79 } | |
80 } | |
81 | |
82 inline uc32 AdvanceGetChar() { | |
83 Advance(); | |
84 return c0_; | |
85 } | |
86 | |
87 // Checks that current charater is c. | |
88 // If so, then consume c and skip whitespace. | |
89 inline bool MatchSkipWhiteSpace(uc32 c) { | |
90 if (c0_ == c) { | |
91 AdvanceSkipWhitespace(); | |
92 return true; | |
93 } | |
94 return false; | |
95 } | |
96 | |
97 // A JSON string (production JSONString) is subset of valid JavaScript string | |
98 // literals. The string must only be double-quoted (not single-quoted), and | |
99 // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and | |
100 // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid. | |
101 Handle<String> ParseJsonString() { | |
102 return ScanJsonString<false>(); | |
103 } | |
104 | |
105 bool ParseJsonString(Handle<String> expected) { | |
106 int length = expected->length(); | |
107 if (source_->length() - position_ - 1 > length) { | |
108 DisallowHeapAllocation no_gc; | |
109 String::FlatContent content = expected->GetFlatContent(); | |
110 if (content.IsOneByte()) { | |
111 DCHECK_EQ('"', c0_); | |
112 const uint8_t* input_chars = seq_source_->GetChars() + position_ + 1; | |
113 const uint8_t* expected_chars = content.ToOneByteVector().start(); | |
114 for (int i = 0; i < length; i++) { | |
115 uint8_t c0 = input_chars[i]; | |
116 if (c0 != expected_chars[i] || c0 == '"' || c0 < 0x20 || c0 == '\\') { | |
117 return false; | |
118 } | |
119 } | |
120 if (input_chars[length] == '"') { | |
121 position_ = position_ + length + 1; | |
122 AdvanceSkipWhitespace(); | |
123 return true; | |
124 } | |
125 } | |
126 } | |
127 return false; | |
128 } | |
129 | |
130 Handle<String> ParseJsonInternalizedString() { | |
131 return ScanJsonString<true>(); | |
132 } | |
133 | |
134 template <bool is_internalized> | |
135 Handle<String> ScanJsonString(); | |
136 // Creates a new string and copies prefix[start..end] into the beginning | |
137 // of it. Then scans the rest of the string, adding characters after the | |
138 // prefix. Called by ScanJsonString when reaching a '\' or non-Latin1 char. | |
139 template <typename StringType, typename SinkChar> | |
140 Handle<String> SlowScanJsonString(Handle<String> prefix, int start, int end); | |
141 | |
142 // A JSON number (production JSONNumber) is a subset of the valid JavaScript | |
143 // decimal number literals. | |
144 // It includes an optional minus sign, must have at least one | |
145 // digit before and after a decimal point, may not have prefixed zeros (unless | |
146 // the integer part is zero), and may include an exponent part (e.g., "e-10"). | |
147 // Hexadecimal and octal numbers are not allowed. | |
148 Handle<Object> ParseJsonNumber(); | |
149 | |
150 // Parse a single JSON value from input (grammar production JSONValue). | |
151 // A JSON value is either a (double-quoted) string literal, a number literal, | |
152 // one of "true", "false", or "null", or an object or array literal. | |
153 Handle<Object> ParseJsonValue(); | |
154 | |
155 // Parse a JSON object literal (grammar production JSONObject). | |
156 // An object literal is a squiggly-braced and comma separated sequence | |
157 // (possibly empty) of key/value pairs, where the key is a JSON string | |
158 // literal, the value is a JSON value, and the two are separated by a colon. | |
159 // A JSON array doesn't allow numbers and identifiers as keys, like a | |
160 // JavaScript array. | |
161 Handle<Object> ParseJsonObject(); | |
162 | |
163 // Helper for ParseJsonObject. Parses the form "123": obj, which is recorded | |
164 // as an element, not a property. | |
165 ParseElementResult ParseElement(Handle<JSObject> json_object); | |
166 | |
167 // Parses a JSON array literal (grammar production JSONArray). An array | |
168 // literal is a square-bracketed and comma separated sequence (possibly empty) | |
169 // of JSON values. | |
170 // A JSON array doesn't allow leaving out values from the sequence, nor does | |
171 // it allow a terminal comma, like a JavaScript array does. | |
172 Handle<Object> ParseJsonArray(); | |
173 | |
174 | |
175 // Mark that a parsing error has happened at the current token, and | |
176 // return a null handle. Primarily for readability. | |
177 inline Handle<Object> ReportUnexpectedCharacter() { | |
178 return Handle<Object>::null(); | |
179 } | |
180 | |
181 inline Isolate* isolate() { return isolate_; } | |
182 inline Factory* factory() { return factory_; } | |
183 inline Handle<JSFunction> object_constructor() { return object_constructor_; } | |
184 | |
185 static const int kInitialSpecialStringLength = 32; | |
186 static const int kPretenureTreshold = 100 * 1024; | |
187 | |
188 | |
189 private: | |
190 Zone* zone() { return &zone_; } | |
191 | |
192 void CommitStateToJsonObject(Handle<JSObject> json_object, Handle<Map> map, | |
193 ZoneList<Handle<Object> >* properties); | |
194 | |
195 Handle<String> source_; | |
196 int source_length_; | |
197 Handle<SeqOneByteString> seq_source_; | |
198 | |
199 PretenureFlag pretenure_; | |
200 Isolate* isolate_; | |
201 Factory* factory_; | |
202 Zone zone_; | |
203 Handle<JSFunction> object_constructor_; | |
204 uc32 c0_; | |
205 int position_; | |
206 }; | |
207 | |
208 template <bool seq_one_byte> | |
209 MaybeHandle<Object> JsonParser<seq_one_byte>::ParseJson() { | |
210 // Advance to the first character (possibly EOS) | |
211 AdvanceSkipWhitespace(); | |
212 Handle<Object> result = ParseJsonValue(); | |
213 if (result.is_null() || c0_ != kEndOfString) { | |
214 // Some exception (for example stack overflow) is already pending. | |
215 if (isolate_->has_pending_exception()) return Handle<Object>::null(); | |
216 | |
217 // Parse failed. Current character is the unexpected token. | |
218 Factory* factory = this->factory(); | |
219 MessageTemplate::Template message; | |
220 Handle<String> argument; | |
221 | |
222 switch (c0_) { | |
223 case kEndOfString: | |
224 message = MessageTemplate::kUnexpectedEOS; | |
225 break; | |
226 case '-': | |
227 case '0': | |
228 case '1': | |
229 case '2': | |
230 case '3': | |
231 case '4': | |
232 case '5': | |
233 case '6': | |
234 case '7': | |
235 case '8': | |
236 case '9': | |
237 message = MessageTemplate::kUnexpectedTokenNumber; | |
238 break; | |
239 case '"': | |
240 message = MessageTemplate::kUnexpectedTokenString; | |
241 break; | |
242 default: | |
243 message = MessageTemplate::kUnexpectedToken; | |
244 argument = factory->LookupSingleCharacterStringFromCode(c0_); | |
245 break; | |
246 } | |
247 | |
248 Handle<Script> script(factory->NewScript(source_)); | |
249 // We should sent compile error event because we compile JSON object in | |
250 // separated source file. | |
251 isolate()->debug()->OnCompileError(script); | |
252 MessageLocation location(script, position_, position_ + 1); | |
253 Handle<Object> error = factory->NewSyntaxError(message, argument); | |
254 return isolate()->template Throw<Object>(error, &location); | |
255 } | |
256 return result; | |
257 } | |
258 | |
259 | |
260 // Parse any JSON value. | |
261 template <bool seq_one_byte> | |
262 Handle<Object> JsonParser<seq_one_byte>::ParseJsonValue() { | |
263 StackLimitCheck stack_check(isolate_); | |
264 if (stack_check.HasOverflowed()) { | |
265 isolate_->StackOverflow(); | |
266 return Handle<Object>::null(); | |
267 } | |
268 | |
269 if (stack_check.InterruptRequested()) { | |
270 ExecutionAccess access(isolate_); | |
271 // Avoid blocking GC in long running parser (v8:3974). | |
272 isolate_->stack_guard()->HandleGCInterrupt(); | |
273 } | |
274 | |
275 if (c0_ == '"') return ParseJsonString(); | |
276 if ((c0_ >= '0' && c0_ <= '9') || c0_ == '-') return ParseJsonNumber(); | |
277 if (c0_ == '{') return ParseJsonObject(); | |
278 if (c0_ == '[') return ParseJsonArray(); | |
279 if (c0_ == 'f') { | |
280 if (AdvanceGetChar() == 'a' && AdvanceGetChar() == 'l' && | |
281 AdvanceGetChar() == 's' && AdvanceGetChar() == 'e') { | |
282 AdvanceSkipWhitespace(); | |
283 return factory()->false_value(); | |
284 } | |
285 return ReportUnexpectedCharacter(); | |
286 } | |
287 if (c0_ == 't') { | |
288 if (AdvanceGetChar() == 'r' && AdvanceGetChar() == 'u' && | |
289 AdvanceGetChar() == 'e') { | |
290 AdvanceSkipWhitespace(); | |
291 return factory()->true_value(); | |
292 } | |
293 return ReportUnexpectedCharacter(); | |
294 } | |
295 if (c0_ == 'n') { | |
296 if (AdvanceGetChar() == 'u' && AdvanceGetChar() == 'l' && | |
297 AdvanceGetChar() == 'l') { | |
298 AdvanceSkipWhitespace(); | |
299 return factory()->null_value(); | |
300 } | |
301 return ReportUnexpectedCharacter(); | |
302 } | |
303 return ReportUnexpectedCharacter(); | |
304 } | |
305 | |
306 | |
307 template <bool seq_one_byte> | |
308 ParseElementResult JsonParser<seq_one_byte>::ParseElement( | |
309 Handle<JSObject> json_object) { | |
310 uint32_t index = 0; | |
311 // Maybe an array index, try to parse it. | |
312 if (c0_ == '0') { | |
313 // With a leading zero, the string has to be "0" only to be an index. | |
314 Advance(); | |
315 } else { | |
316 do { | |
317 int d = c0_ - '0'; | |
318 if (index > 429496729U - ((d + 3) >> 3)) break; | |
319 index = (index * 10) + d; | |
320 Advance(); | |
321 } while (IsDecimalDigit(c0_)); | |
322 } | |
323 | |
324 if (c0_ == '"') { | |
325 // Successfully parsed index, parse and store element. | |
326 AdvanceSkipWhitespace(); | |
327 | |
328 if (c0_ == ':') { | |
329 AdvanceSkipWhitespace(); | |
330 Handle<Object> value = ParseJsonValue(); | |
331 if (!value.is_null()) { | |
332 JSObject::SetOwnElementIgnoreAttributes(json_object, index, value, NONE) | |
333 .Assert(); | |
334 return kElementFound; | |
335 } else { | |
336 return kNullHandle; | |
337 } | |
338 } | |
339 } | |
340 return kElementNotFound; | |
341 } | |
342 | |
343 // Parse a JSON object. Position must be right at '{'. | |
344 template <bool seq_one_byte> | |
345 Handle<Object> JsonParser<seq_one_byte>::ParseJsonObject() { | |
346 HandleScope scope(isolate()); | |
347 Handle<JSObject> json_object = | |
348 factory()->NewJSObject(object_constructor(), pretenure_); | |
349 Handle<Map> map(json_object->map()); | |
350 int descriptor = 0; | |
351 ZoneList<Handle<Object> > properties(8, zone()); | |
352 DCHECK_EQ(c0_, '{'); | |
353 | |
354 bool transitioning = true; | |
355 | |
356 AdvanceSkipWhitespace(); | |
357 if (c0_ != '}') { | |
358 do { | |
359 if (c0_ != '"') return ReportUnexpectedCharacter(); | |
360 | |
361 int start_position = position_; | |
362 Advance(); | |
363 | |
364 if (IsDecimalDigit(c0_)) { | |
365 ParseElementResult element_result = ParseElement(json_object); | |
366 if (element_result == kNullHandle) return Handle<Object>::null(); | |
367 if (element_result == kElementFound) continue; | |
368 } | |
369 // Not an index, fallback to the slow path. | |
370 | |
371 position_ = start_position; | |
372 #ifdef DEBUG | |
373 c0_ = '"'; | |
374 #endif | |
375 | |
376 Handle<String> key; | |
377 Handle<Object> value; | |
378 | |
379 // Try to follow existing transitions as long as possible. Once we stop | |
380 // transitioning, no transition can be found anymore. | |
381 DCHECK(transitioning); | |
382 // First check whether there is a single expected transition. If so, try | |
383 // to parse it first. | |
384 bool follow_expected = false; | |
385 Handle<Map> target; | |
386 if (seq_one_byte) { | |
387 key = TransitionArray::ExpectedTransitionKey(map); | |
388 follow_expected = !key.is_null() && ParseJsonString(key); | |
389 } | |
390 // If the expected transition hits, follow it. | |
391 if (follow_expected) { | |
392 target = TransitionArray::ExpectedTransitionTarget(map); | |
393 } else { | |
394 // If the expected transition failed, parse an internalized string and | |
395 // try to find a matching transition. | |
396 key = ParseJsonInternalizedString(); | |
397 if (key.is_null()) return ReportUnexpectedCharacter(); | |
398 | |
399 target = TransitionArray::FindTransitionToField(map, key); | |
400 // If a transition was found, follow it and continue. | |
401 transitioning = !target.is_null(); | |
402 } | |
403 if (c0_ != ':') return ReportUnexpectedCharacter(); | |
404 | |
405 AdvanceSkipWhitespace(); | |
406 value = ParseJsonValue(); | |
407 if (value.is_null()) return ReportUnexpectedCharacter(); | |
408 | |
409 if (transitioning) { | |
410 PropertyDetails details = | |
411 target->instance_descriptors()->GetDetails(descriptor); | |
412 Representation expected_representation = details.representation(); | |
413 | |
414 if (value->FitsRepresentation(expected_representation)) { | |
415 if (expected_representation.IsHeapObject() && | |
416 !target->instance_descriptors() | |
417 ->GetFieldType(descriptor) | |
418 ->NowContains(value)) { | |
419 Handle<HeapType> value_type( | |
420 value->OptimalType(isolate(), expected_representation)); | |
421 Map::GeneralizeFieldType(target, descriptor, | |
422 expected_representation, value_type); | |
423 } | |
424 DCHECK(target->instance_descriptors() | |
425 ->GetFieldType(descriptor) | |
426 ->NowContains(value)); | |
427 properties.Add(value, zone()); | |
428 map = target; | |
429 descriptor++; | |
430 continue; | |
431 } else { | |
432 transitioning = false; | |
433 } | |
434 } | |
435 | |
436 DCHECK(!transitioning); | |
437 | |
438 // Commit the intermediate state to the object and stop transitioning. | |
439 CommitStateToJsonObject(json_object, map, &properties); | |
440 | |
441 JSObject::DefinePropertyOrElementIgnoreAttributes(json_object, key, value) | |
442 .Check(); | |
443 } while (transitioning && MatchSkipWhiteSpace(',')); | |
444 | |
445 // If we transitioned until the very end, transition the map now. | |
446 if (transitioning) { | |
447 CommitStateToJsonObject(json_object, map, &properties); | |
448 } else { | |
449 while (MatchSkipWhiteSpace(',')) { | |
450 HandleScope local_scope(isolate()); | |
451 if (c0_ != '"') return ReportUnexpectedCharacter(); | |
452 | |
453 int start_position = position_; | |
454 Advance(); | |
455 | |
456 if (IsDecimalDigit(c0_)) { | |
457 ParseElementResult element_result = ParseElement(json_object); | |
458 if (element_result == kNullHandle) return Handle<Object>::null(); | |
459 if (element_result == kElementFound) continue; | |
460 } | |
461 // Not an index, fallback to the slow path. | |
462 | |
463 position_ = start_position; | |
464 #ifdef DEBUG | |
465 c0_ = '"'; | |
466 #endif | |
467 | |
468 Handle<String> key; | |
469 Handle<Object> value; | |
470 | |
471 key = ParseJsonInternalizedString(); | |
472 if (key.is_null() || c0_ != ':') return ReportUnexpectedCharacter(); | |
473 | |
474 AdvanceSkipWhitespace(); | |
475 value = ParseJsonValue(); | |
476 if (value.is_null()) return ReportUnexpectedCharacter(); | |
477 | |
478 JSObject::DefinePropertyOrElementIgnoreAttributes(json_object, key, | |
479 value).Check(); | |
480 } | |
481 } | |
482 | |
483 if (c0_ != '}') { | |
484 return ReportUnexpectedCharacter(); | |
485 } | |
486 } | |
487 AdvanceSkipWhitespace(); | |
488 return scope.CloseAndEscape(json_object); | |
489 } | |
490 | |
491 | |
492 template <bool seq_one_byte> | |
493 void JsonParser<seq_one_byte>::CommitStateToJsonObject( | |
494 Handle<JSObject> json_object, Handle<Map> map, | |
495 ZoneList<Handle<Object> >* properties) { | |
496 JSObject::AllocateStorageForMap(json_object, map); | |
497 DCHECK(!json_object->map()->is_dictionary_map()); | |
498 | |
499 DisallowHeapAllocation no_gc; | |
500 | |
501 int length = properties->length(); | |
502 for (int i = 0; i < length; i++) { | |
503 Handle<Object> value = (*properties)[i]; | |
504 json_object->WriteToField(i, *value); | |
505 } | |
506 } | |
507 | |
508 | |
509 // Parse a JSON array. Position must be right at '['. | |
510 template <bool seq_one_byte> | |
511 Handle<Object> JsonParser<seq_one_byte>::ParseJsonArray() { | |
512 HandleScope scope(isolate()); | |
513 ZoneList<Handle<Object> > elements(4, zone()); | |
514 DCHECK_EQ(c0_, '['); | |
515 | |
516 AdvanceSkipWhitespace(); | |
517 if (c0_ != ']') { | |
518 do { | |
519 Handle<Object> element = ParseJsonValue(); | |
520 if (element.is_null()) return ReportUnexpectedCharacter(); | |
521 elements.Add(element, zone()); | |
522 } while (MatchSkipWhiteSpace(',')); | |
523 if (c0_ != ']') { | |
524 return ReportUnexpectedCharacter(); | |
525 } | |
526 } | |
527 AdvanceSkipWhitespace(); | |
528 // Allocate a fixed array with all the elements. | |
529 Handle<FixedArray> fast_elements = | |
530 factory()->NewFixedArray(elements.length(), pretenure_); | |
531 for (int i = 0, n = elements.length(); i < n; i++) { | |
532 fast_elements->set(i, *elements[i]); | |
533 } | |
534 Handle<Object> json_array = factory()->NewJSArrayWithElements( | |
535 fast_elements, FAST_ELEMENTS, Strength::WEAK, pretenure_); | |
536 return scope.CloseAndEscape(json_array); | |
537 } | |
538 | |
539 | |
540 template <bool seq_one_byte> | |
541 Handle<Object> JsonParser<seq_one_byte>::ParseJsonNumber() { | |
542 bool negative = false; | |
543 int beg_pos = position_; | |
544 if (c0_ == '-') { | |
545 Advance(); | |
546 negative = true; | |
547 } | |
548 if (c0_ == '0') { | |
549 Advance(); | |
550 // Prefix zero is only allowed if it's the only digit before | |
551 // a decimal point or exponent. | |
552 if (IsDecimalDigit(c0_)) return ReportUnexpectedCharacter(); | |
553 } else { | |
554 int i = 0; | |
555 int digits = 0; | |
556 if (c0_ < '1' || c0_ > '9') return ReportUnexpectedCharacter(); | |
557 do { | |
558 i = i * 10 + c0_ - '0'; | |
559 digits++; | |
560 Advance(); | |
561 } while (IsDecimalDigit(c0_)); | |
562 if (c0_ != '.' && c0_ != 'e' && c0_ != 'E' && digits < 10) { | |
563 SkipWhitespace(); | |
564 return Handle<Smi>(Smi::FromInt((negative ? -i : i)), isolate()); | |
565 } | |
566 } | |
567 if (c0_ == '.') { | |
568 Advance(); | |
569 if (!IsDecimalDigit(c0_)) return ReportUnexpectedCharacter(); | |
570 do { | |
571 Advance(); | |
572 } while (IsDecimalDigit(c0_)); | |
573 } | |
574 if (AsciiAlphaToLower(c0_) == 'e') { | |
575 Advance(); | |
576 if (c0_ == '-' || c0_ == '+') Advance(); | |
577 if (!IsDecimalDigit(c0_)) return ReportUnexpectedCharacter(); | |
578 do { | |
579 Advance(); | |
580 } while (IsDecimalDigit(c0_)); | |
581 } | |
582 int length = position_ - beg_pos; | |
583 double number; | |
584 if (seq_one_byte) { | |
585 Vector<const uint8_t> chars(seq_source_->GetChars() + beg_pos, length); | |
586 number = StringToDouble(isolate()->unicode_cache(), chars, | |
587 NO_FLAGS, // Hex, octal or trailing junk. | |
588 std::numeric_limits<double>::quiet_NaN()); | |
589 } else { | |
590 Vector<uint8_t> buffer = Vector<uint8_t>::New(length); | |
591 String::WriteToFlat(*source_, buffer.start(), beg_pos, position_); | |
592 Vector<const uint8_t> result = | |
593 Vector<const uint8_t>(buffer.start(), length); | |
594 number = StringToDouble(isolate()->unicode_cache(), | |
595 result, | |
596 NO_FLAGS, // Hex, octal or trailing junk. | |
597 0.0); | |
598 buffer.Dispose(); | |
599 } | |
600 SkipWhitespace(); | |
601 return factory()->NewNumber(number, pretenure_); | |
602 } | |
603 | |
604 | |
605 template <typename StringType> | |
606 inline void SeqStringSet(Handle<StringType> seq_str, int i, uc32 c); | |
607 | |
608 template <> | |
609 inline void SeqStringSet(Handle<SeqTwoByteString> seq_str, int i, uc32 c) { | |
610 seq_str->SeqTwoByteStringSet(i, c); | |
611 } | |
612 | |
613 template <> | |
614 inline void SeqStringSet(Handle<SeqOneByteString> seq_str, int i, uc32 c) { | |
615 seq_str->SeqOneByteStringSet(i, c); | |
616 } | |
617 | |
618 template <typename StringType> | |
619 inline Handle<StringType> NewRawString(Factory* factory, | |
620 int length, | |
621 PretenureFlag pretenure); | |
622 | |
623 template <> | |
624 inline Handle<SeqTwoByteString> NewRawString(Factory* factory, | |
625 int length, | |
626 PretenureFlag pretenure) { | |
627 return factory->NewRawTwoByteString(length, pretenure).ToHandleChecked(); | |
628 } | |
629 | |
630 template <> | |
631 inline Handle<SeqOneByteString> NewRawString(Factory* factory, | |
632 int length, | |
633 PretenureFlag pretenure) { | |
634 return factory->NewRawOneByteString(length, pretenure).ToHandleChecked(); | |
635 } | |
636 | |
637 | |
638 // Scans the rest of a JSON string starting from position_ and writes | |
639 // prefix[start..end] along with the scanned characters into a | |
640 // sequential string of type StringType. | |
641 template <bool seq_one_byte> | |
642 template <typename StringType, typename SinkChar> | |
643 Handle<String> JsonParser<seq_one_byte>::SlowScanJsonString( | |
644 Handle<String> prefix, int start, int end) { | |
645 int count = end - start; | |
646 int max_length = count + source_length_ - position_; | |
647 int length = Min(max_length, Max(kInitialSpecialStringLength, 2 * count)); | |
648 Handle<StringType> seq_string = | |
649 NewRawString<StringType>(factory(), length, pretenure_); | |
650 // Copy prefix into seq_str. | |
651 SinkChar* dest = seq_string->GetChars(); | |
652 String::WriteToFlat(*prefix, dest, start, end); | |
653 | |
654 while (c0_ != '"') { | |
655 // Check for control character (0x00-0x1f) or unterminated string (<0). | |
656 if (c0_ < 0x20) return Handle<String>::null(); | |
657 if (count >= length) { | |
658 // We need to create a longer sequential string for the result. | |
659 return SlowScanJsonString<StringType, SinkChar>(seq_string, 0, count); | |
660 } | |
661 if (c0_ != '\\') { | |
662 // If the sink can contain UC16 characters, or source_ contains only | |
663 // Latin1 characters, there's no need to test whether we can store the | |
664 // character. Otherwise check whether the UC16 source character can fit | |
665 // in the Latin1 sink. | |
666 if (sizeof(SinkChar) == kUC16Size || seq_one_byte || | |
667 c0_ <= String::kMaxOneByteCharCode) { | |
668 SeqStringSet(seq_string, count++, c0_); | |
669 Advance(); | |
670 } else { | |
671 // StringType is SeqOneByteString and we just read a non-Latin1 char. | |
672 return SlowScanJsonString<SeqTwoByteString, uc16>(seq_string, 0, count); | |
673 } | |
674 } else { | |
675 Advance(); // Advance past the \. | |
676 switch (c0_) { | |
677 case '"': | |
678 case '\\': | |
679 case '/': | |
680 SeqStringSet(seq_string, count++, c0_); | |
681 break; | |
682 case 'b': | |
683 SeqStringSet(seq_string, count++, '\x08'); | |
684 break; | |
685 case 'f': | |
686 SeqStringSet(seq_string, count++, '\x0c'); | |
687 break; | |
688 case 'n': | |
689 SeqStringSet(seq_string, count++, '\x0a'); | |
690 break; | |
691 case 'r': | |
692 SeqStringSet(seq_string, count++, '\x0d'); | |
693 break; | |
694 case 't': | |
695 SeqStringSet(seq_string, count++, '\x09'); | |
696 break; | |
697 case 'u': { | |
698 uc32 value = 0; | |
699 for (int i = 0; i < 4; i++) { | |
700 Advance(); | |
701 int digit = HexValue(c0_); | |
702 if (digit < 0) { | |
703 return Handle<String>::null(); | |
704 } | |
705 value = value * 16 + digit; | |
706 } | |
707 if (sizeof(SinkChar) == kUC16Size || | |
708 value <= String::kMaxOneByteCharCode) { | |
709 SeqStringSet(seq_string, count++, value); | |
710 break; | |
711 } else { | |
712 // StringType is SeqOneByteString and we just read a non-Latin1 | |
713 // char. | |
714 position_ -= 6; // Rewind position_ to \ in \uxxxx. | |
715 Advance(); | |
716 return SlowScanJsonString<SeqTwoByteString, uc16>(seq_string, | |
717 0, | |
718 count); | |
719 } | |
720 } | |
721 default: | |
722 return Handle<String>::null(); | |
723 } | |
724 Advance(); | |
725 } | |
726 } | |
727 | |
728 DCHECK_EQ('"', c0_); | |
729 // Advance past the last '"'. | |
730 AdvanceSkipWhitespace(); | |
731 | |
732 // Shrink seq_string length to count and return. | |
733 return SeqString::Truncate(seq_string, count); | |
734 } | |
735 | |
736 | |
737 template <bool seq_one_byte> | |
738 template <bool is_internalized> | |
739 Handle<String> JsonParser<seq_one_byte>::ScanJsonString() { | |
740 DCHECK_EQ('"', c0_); | |
741 Advance(); | |
742 if (c0_ == '"') { | |
743 AdvanceSkipWhitespace(); | |
744 return factory()->empty_string(); | |
745 } | |
746 | |
747 if (seq_one_byte && is_internalized) { | |
748 // Fast path for existing internalized strings. If the the string being | |
749 // parsed is not a known internalized string, contains backslashes or | |
750 // unexpectedly reaches the end of string, return with an empty handle. | |
751 uint32_t running_hash = isolate()->heap()->HashSeed(); | |
752 int position = position_; | |
753 uc32 c0 = c0_; | |
754 do { | |
755 if (c0 == '\\') { | |
756 c0_ = c0; | |
757 int beg_pos = position_; | |
758 position_ = position; | |
759 return SlowScanJsonString<SeqOneByteString, uint8_t>(source_, | |
760 beg_pos, | |
761 position_); | |
762 } | |
763 if (c0 < 0x20) return Handle<String>::null(); | |
764 if (static_cast<uint32_t>(c0) > | |
765 unibrow::Utf16::kMaxNonSurrogateCharCode) { | |
766 running_hash = | |
767 StringHasher::AddCharacterCore(running_hash, | |
768 unibrow::Utf16::LeadSurrogate(c0)); | |
769 running_hash = | |
770 StringHasher::AddCharacterCore(running_hash, | |
771 unibrow::Utf16::TrailSurrogate(c0)); | |
772 } else { | |
773 running_hash = StringHasher::AddCharacterCore(running_hash, c0); | |
774 } | |
775 position++; | |
776 if (position >= source_length_) return Handle<String>::null(); | |
777 c0 = seq_source_->SeqOneByteStringGet(position); | |
778 } while (c0 != '"'); | |
779 int length = position - position_; | |
780 uint32_t hash = (length <= String::kMaxHashCalcLength) | |
781 ? StringHasher::GetHashCore(running_hash) | |
782 : static_cast<uint32_t>(length); | |
783 Vector<const uint8_t> string_vector( | |
784 seq_source_->GetChars() + position_, length); | |
785 StringTable* string_table = isolate()->heap()->string_table(); | |
786 uint32_t capacity = string_table->Capacity(); | |
787 uint32_t entry = StringTable::FirstProbe(hash, capacity); | |
788 uint32_t count = 1; | |
789 Handle<String> result; | |
790 while (true) { | |
791 Object* element = string_table->KeyAt(entry); | |
792 if (element == isolate()->heap()->undefined_value()) { | |
793 // Lookup failure. | |
794 result = factory()->InternalizeOneByteString( | |
795 seq_source_, position_, length); | |
796 break; | |
797 } | |
798 if (element != isolate()->heap()->the_hole_value() && | |
799 String::cast(element)->IsOneByteEqualTo(string_vector)) { | |
800 result = Handle<String>(String::cast(element), isolate()); | |
801 #ifdef DEBUG | |
802 uint32_t hash_field = | |
803 (hash << String::kHashShift) | String::kIsNotArrayIndexMask; | |
804 DCHECK_EQ(static_cast<int>(result->Hash()), | |
805 static_cast<int>(hash_field >> String::kHashShift)); | |
806 #endif | |
807 break; | |
808 } | |
809 entry = StringTable::NextProbe(entry, count++, capacity); | |
810 } | |
811 position_ = position; | |
812 // Advance past the last '"'. | |
813 AdvanceSkipWhitespace(); | |
814 return result; | |
815 } | |
816 | |
817 int beg_pos = position_; | |
818 // Fast case for Latin1 only without escape characters. | |
819 do { | |
820 // Check for control character (0x00-0x1f) or unterminated string (<0). | |
821 if (c0_ < 0x20) return Handle<String>::null(); | |
822 if (c0_ != '\\') { | |
823 if (seq_one_byte || c0_ <= String::kMaxOneByteCharCode) { | |
824 Advance(); | |
825 } else { | |
826 return SlowScanJsonString<SeqTwoByteString, uc16>(source_, | |
827 beg_pos, | |
828 position_); | |
829 } | |
830 } else { | |
831 return SlowScanJsonString<SeqOneByteString, uint8_t>(source_, | |
832 beg_pos, | |
833 position_); | |
834 } | |
835 } while (c0_ != '"'); | |
836 int length = position_ - beg_pos; | |
837 Handle<String> result = | |
838 factory()->NewRawOneByteString(length, pretenure_).ToHandleChecked(); | |
839 uint8_t* dest = SeqOneByteString::cast(*result)->GetChars(); | |
840 String::WriteToFlat(*source_, dest, beg_pos, position_); | |
841 | |
842 DCHECK_EQ('"', c0_); | |
843 // Advance past the last '"'. | |
844 AdvanceSkipWhitespace(); | |
845 return result; | |
846 } | |
847 | |
848 } // namespace internal | |
849 } // namespace v8 | |
850 | |
851 #endif // V8_JSON_PARSER_H_ | |
OLD | NEW |