OLD | NEW |
---|---|
(Empty) | |
1 // Copyright 2011 the V8 project authors. All rights reserved. | |
2 // Redistribution and use in source and binary forms, with or without | |
3 // modification, are permitted provided that the following conditions are | |
4 // met: | |
5 // | |
6 // * Redistributions of source code must retain the above copyright | |
7 // notice, this list of conditions and the following disclaimer. | |
8 // * Redistributions in binary form must reproduce the above | |
9 // copyright notice, this list of conditions and the following | |
10 // disclaimer in the documentation and/or other materials provided | |
11 // with the distribution. | |
12 // * Neither the name of Google Inc. nor the names of its | |
13 // contributors may be used to endorse or promote products derived | |
14 // from this software without specific prior written permission. | |
15 // | |
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
27 | |
28 #include "v8.h" | |
29 | |
30 #include "char-predicates-inl.h" | |
31 #include "conversions.h" | |
32 #include "json-parser.h" | |
33 #include "messages.h" | |
34 #include "spaces.h" | |
35 | |
36 namespace v8 { | |
37 namespace internal { | |
38 | |
39 | |
40 Handle<Object> JsonParser::ParseJson(Handle<String> source) { | |
41 source_ = Handle<String>(source->TryFlattenGetString()); | |
42 source_length_ = source_->length() - 1; | |
43 | |
44 // Optimized fast case where we only have ascii characters. | |
45 if (source_->IsSeqAsciiString()) { | |
46 is_sequential_ascii_ = true; | |
Lasse Reichstein
2011/05/19 07:27:40
This is not a property that is guaranteed to be pr
Rico
2011/05/23 18:18:12
As discussed offline, this is not the case right?
Lasse Reichstein
2011/05/24 07:28:34
That what is not the case? That strings can change
Rico
2011/05/24 08:49:24
OK, changed so that we reinitialize this variable
| |
47 seq_source_ = Handle<SeqAsciiString>::cast(source_); | |
48 } else { | |
49 is_sequential_ascii_ = false; | |
50 } | |
51 | |
52 // Set initial position right before the string. | |
53 position_ = -1; | |
54 // Advance to the first character (posibly EOS) | |
55 Advance(); | |
56 Next(); | |
57 Handle<Object> result = ParseJsonValue(); | |
58 if (result.is_null() || Next() != Token::EOS) { | |
59 // Parse failed. Scanner's current token is the unexpected token. | |
60 Token::Value token = current_.token; | |
61 | |
62 const char* message; | |
63 const char* name_opt = NULL; | |
64 | |
65 switch (token) { | |
66 case Token::EOS: | |
67 message = "unexpected_eos"; | |
68 break; | |
69 case Token::NUMBER: | |
70 message = "unexpected_token_number"; | |
71 break; | |
72 case Token::STRING: | |
73 message = "unexpected_token_string"; | |
74 break; | |
75 case Token::IDENTIFIER: | |
76 case Token::FUTURE_RESERVED_WORD: | |
77 message = "unexpected_token_identifier"; | |
78 break; | |
79 default: | |
80 message = "unexpected_token"; | |
81 name_opt = Token::String(token); | |
82 ASSERT(name_opt != NULL); | |
83 break; | |
84 } | |
85 | |
86 Factory* factory = isolate()->factory(); | |
87 MessageLocation location(factory->NewScript(source), | |
88 current_.beg_pos, | |
89 current_.end_pos); | |
90 Handle<JSArray> array; | |
91 if (name_opt == NULL) { | |
92 array = factory->NewJSArray(0); | |
93 } else { | |
94 Handle<String> name = factory->NewStringFromUtf8(CStrVector(name_opt)); | |
95 Handle<FixedArray> element = factory->NewFixedArray(1); | |
96 element->set(0, *name); | |
97 array = factory->NewJSArrayWithElements(element); | |
98 } | |
99 Handle<Object> result = factory->NewSyntaxError(message, array); | |
100 isolate()->Throw(*result, &location); | |
101 return Handle<Object>::null(); | |
102 } | |
103 return result; | |
104 } | |
105 | |
106 | |
107 // Parse any JSON value. | |
108 Handle<Object> JsonParser::ParseJsonValue() { | |
109 Token::Value token = Next(); | |
110 switch (token) { | |
111 case Token::STRING: | |
112 return GetString(false); | |
113 case Token::NUMBER: | |
114 return isolate()->factory()->NewNumber(number_); | |
115 case Token::FALSE_LITERAL: | |
116 return isolate()->factory()->false_value(); | |
117 case Token::TRUE_LITERAL: | |
118 return isolate()->factory()->true_value(); | |
119 case Token::NULL_LITERAL: | |
120 return isolate()->factory()->null_value(); | |
121 case Token::LBRACE: | |
122 return ParseJsonObject(); | |
123 case Token::LBRACK: | |
124 return ParseJsonArray(); | |
125 default: | |
126 return ReportUnexpectedToken(); | |
127 } | |
128 } | |
129 | |
130 | |
131 // Parse a JSON object. Scanner must be right after '{' token. | |
132 Handle<Object> JsonParser::ParseJsonObject() { | |
133 Handle<JSFunction> object_constructor( | |
134 isolate()->global_context()->object_function()); | |
135 Handle<JSObject> json_object = | |
136 isolate()->factory()->NewJSObject(object_constructor); | |
137 | |
138 if (Peek() == Token::RBRACE) { | |
139 Next(); | |
140 } else { | |
141 do { | |
142 if (Next() != Token::STRING) { | |
143 return ReportUnexpectedToken(); | |
144 } | |
145 Handle<String> key = GetString(true); | |
Lasse Reichstein
2011/05/19 07:27:40
How about having two functions: GetString() and Ge
Rico
2011/05/23 18:18:12
Done.
| |
146 if (Next() != Token::COLON) { | |
147 return ReportUnexpectedToken(); | |
148 } | |
149 | |
150 Handle<Object> value = ParseJsonValue(); | |
151 if (value.is_null()) return Handle<Object>::null(); | |
152 | |
153 uint32_t index; | |
154 if (key->AsArrayIndex(&index)) { | |
155 SetOwnElement(json_object, index, value, kNonStrictMode); | |
156 } else if (key->Equals(isolate()->heap()->Proto_symbol())) { | |
157 SetPrototype(json_object, value); | |
158 } else { | |
159 SetLocalPropertyIgnoreAttributes(json_object, key, value, NONE); | |
160 } | |
161 } while (Next() == Token::COMMA); | |
162 if (current_.token != Token::RBRACE) { | |
163 return ReportUnexpectedToken(); | |
164 } | |
165 } | |
166 return json_object; | |
167 } | |
168 | |
169 // Parse a JSON array. Scanner must be right after '[' token. | |
170 Handle<Object> JsonParser::ParseJsonArray() { | |
171 ZoneScope zone_scope(DELETE_ON_EXIT); | |
172 ZoneList<Handle<Object> > elements(4); | |
173 | |
174 Token::Value token = Peek(); | |
175 if (token == Token::RBRACK) { | |
176 Next(); | |
177 } else { | |
178 do { | |
179 Handle<Object> element = ParseJsonValue(); | |
180 if (element.is_null()) return Handle<Object>::null(); | |
181 elements.Add(element); | |
182 token = Next(); | |
183 } while (token == Token::COMMA); | |
184 if (token != Token::RBRACK) { | |
185 return ReportUnexpectedToken(); | |
186 } | |
187 } | |
188 | |
189 // Allocate a fixed array with all the elements. | |
190 Handle<FixedArray> fast_elements = | |
191 isolate()->factory()->NewFixedArray(elements.length()); | |
192 | |
193 for (int i = 0, n = elements.length(); i < n; i++) { | |
194 fast_elements->set(i, *elements[i]); | |
195 } | |
196 | |
197 return isolate()->factory()->NewJSArrayWithElements(fast_elements); | |
198 } | |
199 | |
200 | |
201 Token::Value JsonParser::Next() { | |
202 current_ = next_; | |
203 ScanJson(); | |
204 return current_.token; | |
205 } | |
206 | |
207 void JsonParser::ScanJson() { | |
208 Token::Value token; | |
209 do { | |
210 // Remember the position of the next token | |
211 next_.beg_pos = position_; | |
212 switch (c0_) { | |
213 case '\t': | |
214 case '\r': | |
215 case '\n': | |
216 case ' ': | |
217 Advance(); | |
218 token = Token::WHITESPACE; | |
219 break; | |
220 case '{': | |
221 Advance(); | |
222 token = Token::LBRACE; | |
223 break; | |
224 case '}': | |
225 Advance(); | |
226 token = Token::RBRACE; | |
227 break; | |
228 case '[': | |
229 Advance(); | |
230 token = Token::LBRACK; | |
231 break; | |
232 case ']': | |
233 Advance(); | |
234 token = Token::RBRACK; | |
235 break; | |
236 case ':': | |
237 Advance(); | |
238 token = Token::COLON; | |
239 break; | |
240 case ',': | |
241 Advance(); | |
242 token = Token::COMMA; | |
243 break; | |
244 case '"': | |
245 token = ScanJsonString(); | |
246 break; | |
247 case '-': | |
248 case '0': | |
249 case '1': | |
250 case '2': | |
251 case '3': | |
252 case '4': | |
253 case '5': | |
254 case '6': | |
255 case '7': | |
256 case '8': | |
257 case '9': | |
258 token = ScanJsonNumber(); | |
259 break; | |
260 case 't': | |
261 token = ScanJsonIdentifier("true", Token::TRUE_LITERAL); | |
262 break; | |
263 case 'f': | |
264 token = ScanJsonIdentifier("false", Token::FALSE_LITERAL); | |
265 break; | |
266 case 'n': | |
267 token = ScanJsonIdentifier("null", Token::NULL_LITERAL); | |
268 break; | |
269 default: | |
270 if (c0_ < 0) { | |
271 Advance(); | |
272 token = Token::EOS; | |
273 } else { | |
274 Advance(); | |
275 token = Token::ILLEGAL; | |
276 } | |
277 } | |
278 } while (token == Token::WHITESPACE); | |
279 | |
280 next_.end_pos = position_; | |
281 next_.token = token; | |
282 } | |
283 | |
284 | |
285 Token::Value JsonParser::ScanJsonIdentifier(const char* text, | |
286 Token::Value token) { | |
287 while (*text != '\0') { | |
288 if (c0_ != *text) return Token::ILLEGAL; | |
289 Advance(); | |
290 text++; | |
291 } | |
292 return token; | |
293 } | |
294 | |
295 | |
296 Token::Value JsonParser::ScanJsonNumber() { | |
297 bool negative = false; | |
298 | |
299 if (c0_ == '-') { | |
300 Advance(); | |
301 negative = true; | |
302 } | |
303 if (c0_ == '0') { | |
304 Advance(); | |
305 // Prefix zero is only allowed if it's the only digit before | |
306 // a decimal point or exponent. | |
307 if ('0' <= c0_ && c0_ <= '9') return Token::ILLEGAL; | |
308 } else { | |
309 int i = 0; | |
310 int digits = 0; | |
311 if (c0_ < '1' || c0_ > '9') return Token::ILLEGAL; | |
312 do { | |
313 i = i * 10 + c0_ - '0'; | |
314 digits++; | |
315 Advance(); | |
316 } while (c0_ >= '0' && c0_ <= '9'); | |
317 if (c0_ != '.' && c0_ != 'e' && c0_ != 'E' && digits < 10) { | |
318 number_ = (negative ? -i : i); | |
319 return Token::NUMBER; | |
320 } | |
321 } | |
322 if (c0_ == '.') { | |
323 Advance(); | |
324 if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL; | |
325 do { | |
326 Advance(); | |
327 } while (c0_ >= '0' && c0_ <= '9'); | |
328 } | |
329 if (AsciiAlphaToLower(c0_) == 'e') { | |
330 Advance(); | |
331 if (c0_ == '-' || c0_ == '+') Advance(); | |
332 if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL; | |
333 do { | |
334 Advance(); | |
335 } while (c0_ >= '0' && c0_ <= '9'); | |
336 } | |
337 if (is_sequential_ascii_) { | |
338 Vector<const char> chars(seq_source_->GetChars() + next_.beg_pos, | |
339 position_ - next_.beg_pos); | |
340 number_ = StringToDouble(isolate()->unicode_cache(), | |
341 chars, | |
342 NO_FLAGS, // Hex, octal or trailing junk. | |
343 OS::nan_value()); | |
344 } else { | |
Lasse Reichstein
2011/05/19 07:27:40
Ick. Why create a heap string?
Just make a buffer
Rico
2011/05/23 18:18:12
Fast atoi? this is a double.
I added a conversion
| |
345 Handle<String> value = isolate()->factory()->NewSubString( | |
346 source_, next_.beg_pos, position_); | |
347 number_ = StringToDouble(isolate()->unicode_cache(), | |
348 *value, | |
349 NO_FLAGS, // Hex, octal or trailing junk. | |
350 OS::nan_value()); | |
351 } | |
352 return Token::NUMBER; | |
353 } | |
354 | |
355 Token::Value JsonParser::SlowScanJsonString() { | |
356 // The currently scanned ascii characters. | |
357 Handle<String> ascii(isolate()->factory()->NewSubString(source_, | |
358 next_.beg_pos + 1, | |
359 position_)); | |
360 Handle<String> two_byte = | |
361 isolate()->factory()->NewRawTwoByteString(kInitialSpecialStringSize, | |
362 NOT_TENURED); | |
363 ASSERT(two_byte->IsSeqTwoByteString()); | |
Lasse Reichstein
2011/05/19 07:27:40
Odd assert. How could that not happen?
Consider ch
Rico
2011/05/23 18:18:12
Done.
| |
364 Handle<SeqTwoByteString> seq_two_byte = | |
365 Handle<SeqTwoByteString>::cast(two_byte); | |
366 | |
367 int allocation_count = 1; | |
368 int count = 0; | |
369 | |
370 while (c0_ != '"') { | |
371 // Create new seq string | |
372 if (count >= kInitialSpecialStringSize * allocation_count) { | |
373 allocation_count++; | |
374 int new_size = allocation_count * kInitialSpecialStringSize; | |
375 Handle<String> new_two_byte = | |
376 isolate()->factory()->NewRawTwoByteString(new_size, | |
377 NOT_TENURED); | |
378 uc16* char_start = | |
379 Handle<SeqTwoByteString>::cast(new_two_byte)->GetChars(); | |
380 String::WriteToFlat(*seq_two_byte, char_start, 0, count); | |
381 seq_two_byte = Handle<SeqTwoByteString>::cast(new_two_byte); | |
382 } | |
383 | |
384 // Check for control character (0x00-0x1f) or unterminated string (<0). | |
385 if (c0_ < 0x20) return Token::ILLEGAL; | |
386 if (c0_ != '\\') { | |
387 seq_two_byte->SeqTwoByteStringSet(count++, c0_); | |
388 Advance(); | |
389 } else { | |
390 Advance(); | |
391 switch (c0_) { | |
392 case '"': | |
393 case '\\': | |
394 case '/': | |
395 seq_two_byte->SeqTwoByteStringSet(count++, c0_); | |
396 break; | |
397 case 'b': | |
398 seq_two_byte->SeqTwoByteStringSet(count++, '\x08'); | |
399 break; | |
400 case 'f': | |
401 seq_two_byte->SeqTwoByteStringSet(count++, '\x0c'); | |
402 break; | |
403 case 'n': | |
404 seq_two_byte->SeqTwoByteStringSet(count++, '\x0a'); | |
405 break; | |
406 case 'r': | |
407 seq_two_byte->SeqTwoByteStringSet(count++, '\x0d'); | |
408 break; | |
409 case 't': | |
410 seq_two_byte->SeqTwoByteStringSet(count++, '\x09'); | |
411 break; | |
412 case 'u': { | |
413 uc32 value = 0; | |
414 for (int i = 0; i < 4; i++) { | |
415 Advance(); | |
416 int digit = HexValue(c0_); | |
417 if (digit < 0) { | |
418 return Token::ILLEGAL; | |
419 } | |
420 value = value * 16 + digit; | |
421 } | |
422 seq_two_byte->SeqTwoByteStringSet(count++, value); | |
423 break; | |
424 } | |
425 default: | |
426 return Token::ILLEGAL; | |
427 } | |
428 Advance(); | |
429 } | |
430 } | |
431 // Advance past the last '"'. | |
432 ASSERT_EQ('"', c0_); | |
433 Advance(); | |
434 | |
435 // Shrink the the string to our length. | |
436 isolate()->heap()-> | |
437 new_space()-> | |
438 ShrinkStringAtAllocationBoundary<SeqTwoByteString>(*seq_two_byte, | |
439 count); | |
Lasse Reichstein
2011/05/19 07:27:40
If the ascii string is short (e.g., it's shorter t
Rico
2011/05/23 18:18:12
Indeed, this is one of those optimizations for the
| |
440 string_val_ = isolate()->factory()->NewConsString(ascii, seq_two_byte); | |
441 return Token::STRING; | |
442 } | |
443 | |
444 | |
445 Token::Value JsonParser::ScanJsonString() { | |
446 ASSERT_EQ('"', c0_); | |
447 // Set string_val to null. If string_val is not set we assume an | |
448 // ascii string begining at next_.beg_pos + 1 to next_.end_pos - 1. | |
449 string_val_ = Handle<String>::null(); | |
450 Advance(); | |
451 // Fast case for ascii only without escape characters. | |
452 while (c0_ != '"') { | |
453 // Check for control character (0x00-0x1f) or unterminated string (<0). | |
454 if (c0_ < 0x20) return Token::ILLEGAL; | |
455 if (c0_ != '\\' && c0_ < kMaxAsciiCharCode) { | |
456 Advance(); | |
457 } else { | |
458 return SlowScanJsonString(); | |
Lasse Reichstein
2011/05/19 07:27:40
The SlowScanJsonString creates a TwoByte string. I
Rico
2011/05/23 18:18:12
Yes
| |
459 } | |
460 } | |
461 ASSERT_EQ('"', c0_); | |
462 // Advance past the last '"'. | |
463 Advance(); | |
464 return Token::STRING; | |
465 } | |
466 | |
467 | |
468 Handle<String> JsonParser::GetString(bool is_symbol) { | |
469 // We have a non ascii string, return that. | |
470 if (!string_val_.is_null()) return string_val_; | |
Lasse Reichstein
2011/05/19 07:27:40
That does not make it a symbol if it isn't already
Rico
2011/05/23 18:18:12
renaming hint_symbol
| |
471 | |
472 if (is_sequential_ascii_ && is_symbol) { | |
473 Handle<SeqAsciiString> seq = Handle<SeqAsciiString>::cast(source_); | |
474 // The current token includes the '"' in both ends. | |
475 int length = current_.end_pos - current_.beg_pos - 2; | |
476 return isolate()->factory()->LookupAsciiSymbol(seq_source_, | |
477 current_.beg_pos + 1, | |
478 length); | |
479 } | |
480 // The current token includes the '"' in both ends. | |
481 return isolate()->factory()->NewSubString( | |
Lasse Reichstein
2011/05/19 07:27:40
This also doesn't make it a symbol even if is_symb
Rico
2011/05/23 18:18:12
Done.
| |
482 source_, current_.beg_pos + 1, current_.end_pos - 1); | |
483 } | |
484 | |
485 } } // namespace v8::internal | |
OLD | NEW |