OLD | NEW |
---|---|
(Empty) | |
1 // Copyright 2011 the V8 project authors. All rights reserved. | |
2 // Redistribution and use in source and binary forms, with or without | |
3 // modification, are permitted provided that the following conditions are | |
4 // met: | |
5 // | |
6 // * Redistributions of source code must retain the above copyright | |
7 // notice, this list of conditions and the following disclaimer. | |
8 // * Redistributions in binary form must reproduce the above | |
9 // copyright notice, this list of conditions and the following | |
10 // disclaimer in the documentation and/or other materials provided | |
11 // with the distribution. | |
12 // * Neither the name of Google Inc. nor the names of its | |
13 // contributors may be used to endorse or promote products derived | |
14 // from this software without specific prior written permission. | |
15 // | |
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
27 | |
28 #include "v8.h" | |
29 | |
30 #include "char-predicates-inl.h" | |
31 #include "conversions.h" | |
32 #include "json-parser.h" | |
33 #include "messages.h" | |
34 #include "spaces.h" | |
35 | |
36 namespace v8 { | |
37 namespace internal { | |
38 | |
39 | |
40 Handle<Object> JsonParser::ParseJson(Handle<String> source) { | |
41 isolate_ = source->map()->isolate(); | |
42 source_ = Handle<String>(source->TryFlattenGetString()); | |
43 source_length_ = source_->length() - 1; | |
44 | |
45 // Optimized fast case where we only have ascii characters. | |
46 if (source_->IsSeqAsciiString()) { | |
47 is_sequential_ascii_ = true; | |
48 seq_source_ = Handle<SeqAsciiString>::cast(source_); | |
49 } else { | |
50 is_sequential_ascii_ = false; | |
51 } | |
52 | |
53 // Set initial position right before the string. | |
54 position_ = -1; | |
55 // Advance to the first character (posibly EOS) | |
56 Advance(); | |
57 Next(); | |
58 Handle<Object> result = ParseJsonValue(); | |
59 if (result.is_null() || Next() != Token::EOS) { | |
60 // Parse failed. Scanner's current token is the unexpected token. | |
61 Token::Value token = current_.token; | |
62 | |
63 const char* message; | |
64 const char* name_opt = NULL; | |
65 | |
66 switch (token) { | |
67 case Token::EOS: | |
68 message = "unexpected_eos"; | |
69 break; | |
70 case Token::NUMBER: | |
71 message = "unexpected_token_number"; | |
72 break; | |
73 case Token::STRING: | |
74 message = "unexpected_token_string"; | |
75 break; | |
76 case Token::IDENTIFIER: | |
77 case Token::FUTURE_RESERVED_WORD: | |
78 message = "unexpected_token_identifier"; | |
79 break; | |
80 default: | |
81 message = "unexpected_token"; | |
82 name_opt = Token::String(token); | |
83 ASSERT(name_opt != NULL); | |
84 break; | |
85 } | |
86 | |
87 Factory* factory = isolate()->factory(); | |
88 MessageLocation location(factory->NewScript(source), | |
89 current_.beg_pos, | |
90 current_.end_pos); | |
91 Handle<JSArray> array; | |
92 if (name_opt == NULL) { | |
93 array = factory->NewJSArray(0); | |
94 } else { | |
95 Handle<String> name = factory->NewStringFromUtf8(CStrVector(name_opt)); | |
96 Handle<FixedArray> element = factory->NewFixedArray(1); | |
97 element->set(0, *name); | |
98 array = factory->NewJSArrayWithElements(element); | |
99 } | |
100 Handle<Object> result = factory->NewSyntaxError(message, array); | |
101 isolate()->Throw(*result, &location); | |
102 return Handle<Object>::null(); | |
103 } | |
104 return result; | |
105 } | |
106 | |
107 | |
108 // Parse any JSON value. | |
109 Handle<Object> JsonParser::ParseJsonValue() { | |
110 Token::Value token = Next(); | |
111 switch (token) { | |
112 case Token::STRING: | |
113 return GetString(false); | |
114 case Token::NUMBER: | |
115 return isolate()->factory()->NewNumber(number_); | |
116 case Token::FALSE_LITERAL: | |
117 return isolate()->factory()->false_value(); | |
118 case Token::TRUE_LITERAL: | |
119 return isolate()->factory()->true_value(); | |
120 case Token::NULL_LITERAL: | |
121 return isolate()->factory()->null_value(); | |
122 case Token::LBRACE: | |
123 return ParseJsonObject(); | |
124 case Token::LBRACK: | |
125 return ParseJsonArray(); | |
126 default: | |
127 return ReportUnexpectedToken(); | |
128 } | |
129 } | |
130 | |
131 | |
132 // Parse a JSON object. Scanner must be right after '{' token. | |
133 Handle<Object> JsonParser::ParseJsonObject() { | |
134 Handle<JSFunction> object_constructor( | |
135 isolate()->global_context()->object_function()); | |
136 Handle<JSObject> json_object = | |
137 isolate()->factory()->NewJSObject(object_constructor); | |
138 | |
139 if (Peek() == Token::RBRACE) { | |
140 Next(); | |
141 } else { | |
142 do { | |
143 if (Next() != Token::STRING) { | |
144 return ReportUnexpectedToken(); | |
145 } | |
146 Handle<String> key = GetString(true); | |
147 if (Next() != Token::COLON) { | |
148 return ReportUnexpectedToken(); | |
149 } | |
150 | |
151 Handle<Object> value = ParseJsonValue(); | |
152 if (value.is_null()) return Handle<Object>::null(); | |
153 | |
154 uint32_t index; | |
155 if (key->AsArrayIndex(&index)) { | |
156 SetOwnElement(json_object, index, value, kNonStrictMode); | |
157 } else if (key->Equals(isolate()->heap()->Proto_symbol())) { | |
158 SetPrototype(json_object, value); | |
159 } else { | |
160 SetLocalPropertyIgnoreAttributes(json_object, key, value, NONE); | |
161 } | |
162 } while (Next() == Token::COMMA); | |
163 if (current_.token != Token::RBRACE) { | |
164 return ReportUnexpectedToken(); | |
165 } | |
166 } | |
167 return json_object; | |
168 } | |
169 | |
170 // Parse a JSON array. Scanner must be right after '[' token. | |
171 Handle<Object> JsonParser::ParseJsonArray() { | |
172 ZoneScope zone_scope(DELETE_ON_EXIT); | |
173 ZoneList<Handle<Object> > elements(4); | |
174 | |
175 Token::Value token = Peek(); | |
176 if (token == Token::RBRACK) { | |
177 Next(); | |
178 } else { | |
179 do { | |
180 Handle<Object> element = ParseJsonValue(); | |
181 if (element.is_null()) return Handle<Object>::null(); | |
182 elements.Add(element); | |
183 token = Next(); | |
184 } while (token == Token::COMMA); | |
185 if (token != Token::RBRACK) { | |
186 return ReportUnexpectedToken(); | |
187 } | |
188 } | |
189 | |
190 // Allocate a fixed array with all the elements. | |
191 Handle<FixedArray> fast_elements = | |
192 isolate()->factory()->NewFixedArray(elements.length()); | |
193 | |
194 for (int i = 0, n = elements.length(); i < n; i++) { | |
195 fast_elements->set(i, *elements[i]); | |
196 } | |
197 | |
198 return isolate()->factory()->NewJSArrayWithElements(fast_elements); | |
199 } | |
200 | |
201 | |
202 Token::Value JsonParser::Next() { | |
203 current_ = next_; | |
204 ScanJson(); | |
205 return current_.token; | |
206 } | |
207 | |
208 void JsonParser::ScanJson() { | |
209 Token::Value token; | |
210 do { | |
211 // Remember the position of the next token | |
212 next_.beg_pos = position_; | |
213 switch (c0_) { | |
214 case '\t': | |
215 case '\r': | |
216 case '\n': | |
217 case ' ': | |
218 Advance(); | |
219 token = Token::WHITESPACE; | |
220 break; | |
221 case '{': | |
222 Advance(); | |
223 token = Token::LBRACE; | |
224 break; | |
225 case '}': | |
226 Advance(); | |
227 token = Token::RBRACE; | |
228 break; | |
229 case '[': | |
230 Advance(); | |
231 token = Token::LBRACK; | |
232 break; | |
233 case ']': | |
234 Advance(); | |
235 token = Token::RBRACK; | |
236 break; | |
237 case ':': | |
238 Advance(); | |
239 token = Token::COLON; | |
240 break; | |
241 case ',': | |
242 Advance(); | |
243 token = Token::COMMA; | |
244 break; | |
245 case '"': | |
246 token = ScanJsonString(); | |
247 break; | |
248 case '-': | |
249 case '0': | |
250 case '1': | |
251 case '2': | |
252 case '3': | |
253 case '4': | |
254 case '5': | |
255 case '6': | |
256 case '7': | |
257 case '8': | |
258 case '9': | |
259 token = ScanJsonNumber(); | |
260 break; | |
261 case 't': | |
262 token = ScanJsonIdentifier("true", Token::TRUE_LITERAL); | |
263 break; | |
264 case 'f': | |
265 token = ScanJsonIdentifier("false", Token::FALSE_LITERAL); | |
266 break; | |
267 case 'n': | |
268 token = ScanJsonIdentifier("null", Token::NULL_LITERAL); | |
269 break; | |
270 default: | |
271 if (c0_ < 0) { | |
272 Advance(); | |
273 token = Token::EOS; | |
274 } else { | |
275 Advance(); | |
276 token = Token::ILLEGAL; | |
277 } | |
278 } | |
279 } while (token == Token::WHITESPACE); | |
280 | |
281 next_.end_pos = position_; | |
282 next_.token = token; | |
283 } | |
284 | |
285 | |
286 Token::Value JsonParser::ScanJsonIdentifier(const char* text, | |
287 Token::Value token) { | |
288 while (*text != '\0') { | |
289 if (c0_ != *text) return Token::ILLEGAL; | |
290 Advance(); | |
291 text++; | |
292 } | |
293 return token; | |
294 } | |
295 | |
296 | |
297 Token::Value JsonParser::ScanJsonNumber() { | |
298 bool negative = false; | |
299 | |
300 if (c0_ == '-') { | |
301 Advance(); | |
302 negative = true; | |
303 } | |
304 if (c0_ == '0') { | |
305 Advance(); | |
306 // Prefix zero is only allowed if it's the only digit before | |
307 // a decimal point or exponent. | |
308 if ('0' <= c0_ && c0_ <= '9') return Token::ILLEGAL; | |
309 } else { | |
310 int i = 0; | |
311 int digits = 0; | |
312 if (c0_ < '1' || c0_ > '9') return Token::ILLEGAL; | |
313 do { | |
314 i = i * 10 + c0_ - '0'; | |
315 digits++; | |
316 Advance(); | |
317 } while (c0_ >= '0' && c0_ <= '9'); | |
318 if (c0_ != '.' && c0_ != 'e' && c0_ != 'E' && digits < 10) { | |
319 number_ = (negative ? -i : i); | |
320 return Token::NUMBER; | |
321 } | |
322 } | |
323 if (c0_ == '.') { | |
324 Advance(); | |
325 if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL; | |
326 do { | |
327 Advance(); | |
328 } while (c0_ >= '0' && c0_ <= '9'); | |
329 } | |
330 if (AsciiAlphaToLower(c0_) == 'e') { | |
331 Advance(); | |
332 if (c0_ == '-' || c0_ == '+') Advance(); | |
333 if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL; | |
334 do { | |
335 Advance(); | |
336 } while (c0_ >= '0' && c0_ <= '9'); | |
337 } | |
338 if (is_sequential_ascii_) { | |
339 Vector<const char> chars(seq_source_->GetChars() + next_.beg_pos, | |
340 position_ - next_.beg_pos); | |
341 number_ = StringToDouble(isolate()->unicode_cache(), | |
342 chars, | |
343 NO_FLAGS, // Hex, octal or trailing junk. | |
344 OS::nan_value()); | |
345 } else { | |
346 Vector<uc16> buffer = Vector<uc16>::New(position_ - next_.beg_pos); | |
347 String::WriteToFlat(*source_, buffer.start(), next_.beg_pos, position_); | |
Lasse Reichstein
2011/05/24 07:28:34
You only need an ASCII buffer (we have already est
Rico
2011/05/24 08:49:24
Done.
| |
348 Vector<const uc16> res = | |
Lasse Reichstein
2011/05/24 07:28:34
res -> result.
Rico
2011/05/24 08:49:24
Done.
| |
349 Vector<const uc16>(reinterpret_cast<const uc16*>(buffer.start()), | |
350 position_ - next_.beg_pos); | |
351 number_ = StringToDouble(isolate()->unicode_cache(), | |
352 res, | |
353 NO_FLAGS, // Hex, octal or trailing junk. | |
354 OS::nan_value()); | |
Lasse Reichstein
2011/05/24 07:28:34
Just drop the last argument (i.e., let it default
Rico
2011/05/24 08:49:24
Done.
| |
355 buffer.Dispose(); | |
356 } | |
357 return Token::NUMBER; | |
358 } | |
359 | |
360 Token::Value JsonParser::SlowScanJsonString() { | |
361 // The currently scanned ascii characters. | |
362 Handle<String> ascii(isolate()->factory()->NewSubString(source_, | |
363 next_.beg_pos + 1, | |
364 position_)); | |
365 Handle<String> two_byte = | |
366 isolate()->factory()->NewRawTwoByteString(kInitialSpecialStringSize, | |
367 NOT_TENURED); | |
368 Handle<SeqTwoByteString> seq_two_byte = | |
369 Handle<SeqTwoByteString>::cast(two_byte); | |
370 | |
371 int allocation_count = 1; | |
372 int count = 0; | |
373 | |
374 while (c0_ != '"') { | |
375 // Create new seq string | |
376 if (count >= kInitialSpecialStringSize * allocation_count) { | |
377 allocation_count++; | |
378 int new_size = allocation_count * kInitialSpecialStringSize; | |
379 Handle<String> new_two_byte = | |
380 isolate()->factory()->NewRawTwoByteString(new_size, | |
381 NOT_TENURED); | |
382 uc16* char_start = | |
383 Handle<SeqTwoByteString>::cast(new_two_byte)->GetChars(); | |
384 String::WriteToFlat(*seq_two_byte, char_start, 0, count); | |
385 seq_two_byte = Handle<SeqTwoByteString>::cast(new_two_byte); | |
386 } | |
387 | |
388 // Check for control character (0x00-0x1f) or unterminated string (<0). | |
389 if (c0_ < 0x20) return Token::ILLEGAL; | |
390 if (c0_ != '\\') { | |
391 seq_two_byte->SeqTwoByteStringSet(count++, c0_); | |
392 Advance(); | |
393 } else { | |
394 Advance(); | |
395 switch (c0_) { | |
396 case '"': | |
397 case '\\': | |
398 case '/': | |
399 seq_two_byte->SeqTwoByteStringSet(count++, c0_); | |
400 break; | |
401 case 'b': | |
402 seq_two_byte->SeqTwoByteStringSet(count++, '\x08'); | |
403 break; | |
404 case 'f': | |
405 seq_two_byte->SeqTwoByteStringSet(count++, '\x0c'); | |
406 break; | |
407 case 'n': | |
408 seq_two_byte->SeqTwoByteStringSet(count++, '\x0a'); | |
409 break; | |
410 case 'r': | |
411 seq_two_byte->SeqTwoByteStringSet(count++, '\x0d'); | |
412 break; | |
413 case 't': | |
414 seq_two_byte->SeqTwoByteStringSet(count++, '\x09'); | |
415 break; | |
416 case 'u': { | |
417 uc32 value = 0; | |
418 for (int i = 0; i < 4; i++) { | |
419 Advance(); | |
420 int digit = HexValue(c0_); | |
421 if (digit < 0) { | |
422 return Token::ILLEGAL; | |
423 } | |
424 value = value * 16 + digit; | |
425 } | |
426 seq_two_byte->SeqTwoByteStringSet(count++, value); | |
427 break; | |
428 } | |
429 default: | |
430 return Token::ILLEGAL; | |
431 } | |
432 Advance(); | |
433 } | |
434 } | |
435 // Advance past the last '"'. | |
436 ASSERT_EQ('"', c0_); | |
437 Advance(); | |
438 | |
439 // Shrink the the string to our length. | |
440 isolate()->heap()-> | |
441 new_space()-> | |
442 ShrinkStringAtAllocationBoundary<SeqTwoByteString>(*seq_two_byte, | |
443 count); | |
444 string_val_ = isolate()->factory()->NewConsString(ascii, seq_two_byte); | |
445 return Token::STRING; | |
446 } | |
447 | |
448 | |
449 Token::Value JsonParser::ScanJsonString() { | |
450 ASSERT_EQ('"', c0_); | |
451 // Set string_val to null. If string_val is not set we assume an | |
452 // ascii string begining at next_.beg_pos + 1 to next_.end_pos - 1. | |
453 string_val_ = Handle<String>::null(); | |
454 Advance(); | |
455 // Fast case for ascii only without escape characters. | |
456 while (c0_ != '"') { | |
457 // Check for control character (0x00-0x1f) or unterminated string (<0). | |
458 if (c0_ < 0x20) return Token::ILLEGAL; | |
459 if (c0_ != '\\' && c0_ < kMaxAsciiCharCode) { | |
460 Advance(); | |
461 } else { | |
462 return SlowScanJsonString(); | |
463 } | |
464 } | |
465 ASSERT_EQ('"', c0_); | |
466 // Advance past the last '"'. | |
467 Advance(); | |
468 return Token::STRING; | |
469 } | |
470 | |
471 Handle<String> JsonParser::GetString() { | |
472 return GetString(false); | |
473 } | |
474 | |
475 Handle<String> JsonParser::GetSymbol() { | |
476 Handle<String> result = GetString(true); | |
477 if (result->IsSymbol()) return result; | |
478 return isolate()->factory()->LookupSymbol(result); | |
Lasse Reichstein
2011/05/24 07:28:34
I guess that's ok ... for now.
| |
479 } | |
480 | |
481 Handle<String> JsonParser::GetString(bool hint_symbol) { | |
482 // We have a non ascii string, return that. | |
483 if (!string_val_.is_null()) return string_val_; | |
484 | |
485 if (is_sequential_ascii_ && hint_symbol) { | |
486 Handle<SeqAsciiString> seq = Handle<SeqAsciiString>::cast(source_); | |
487 // The current token includes the '"' in both ends. | |
488 int length = current_.end_pos - current_.beg_pos - 2; | |
489 return isolate()->factory()->LookupAsciiSymbol(seq_source_, | |
490 current_.beg_pos + 1, | |
491 length); | |
492 } | |
493 // The current token includes the '"' in both ends. | |
494 return isolate()->factory()->NewSubString( | |
495 source_, current_.beg_pos + 1, current_.end_pos - 1); | |
496 } | |
497 | |
498 } } // namespace v8::internal | |
OLD | NEW |