Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(182)

Side by Side Diff: src/json-parser.cc

Issue 7134010: Specialize JSON parser to only check for SequentialAsciiString once. (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: Created 9 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/json-parser.h ('k') | src/runtime.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are
4 // met:
5 //
6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided
11 // with the distribution.
12 // * Neither the name of Google Inc. nor the names of its
13 // contributors may be used to endorse or promote products derived
14 // from this software without specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28 #include "v8.h"
29
30 #include "char-predicates-inl.h"
31 #include "conversions.h"
32 #include "json-parser.h"
33 #include "messages.h"
34 #include "spaces.h"
35
36 namespace v8 {
37 namespace internal {
38
39
40 Handle<Object> JsonParser::ParseJson(Handle<String> source) {
41 isolate_ = source->map()->isolate();
42 source_ = Handle<String>(source->TryFlattenGetString());
43 source_length_ = source_->length() - 1;
44
45 // Optimized fast case where we only have ascii characters.
46 if (source_->IsSeqAsciiString()) {
47 is_sequential_ascii_ = true;
48 seq_source_ = Handle<SeqAsciiString>::cast(source_);
49 } else {
50 is_sequential_ascii_ = false;
51 }
52
53 // Set initial position right before the string.
54 position_ = -1;
55 // Advance to the first character (posibly EOS)
56 AdvanceSkipWhitespace();
57 Handle<Object> result = ParseJsonValue();
58 if (result.is_null() || c0_ != kEndOfString) {
59 // Parse failed. Current character is the unexpected token.
60
61 const char* message;
62 Factory* factory = isolate()->factory();
63 Handle<JSArray> array;
64
65 switch (c0_) {
66 case kEndOfString:
67 message = "unexpected_eos";
68 array = factory->NewJSArray(0);
69 break;
70 case '-':
71 case '0':
72 case '1':
73 case '2':
74 case '3':
75 case '4':
76 case '5':
77 case '6':
78 case '7':
79 case '8':
80 case '9':
81 message = "unexpected_token_number";
82 array = factory->NewJSArray(0);
83 break;
84 case '"':
85 message = "unexpected_token_string";
86 array = factory->NewJSArray(0);
87 break;
88 default:
89 message = "unexpected_token";
90 Handle<Object> name = LookupSingleCharacterStringFromCode(c0_);
91 Handle<FixedArray> element = factory->NewFixedArray(1);
92 element->set(0, *name);
93 array = factory->NewJSArrayWithElements(element);
94 break;
95 }
96
97 MessageLocation location(factory->NewScript(source),
98 position_,
99 position_ + 1);
100 Handle<Object> result = factory->NewSyntaxError(message, array);
101 isolate()->Throw(*result, &location);
102 return Handle<Object>::null();
103 }
104 return result;
105 }
106
107
108 // Parse any JSON value.
109 Handle<Object> JsonParser::ParseJsonValue() {
110 switch (c0_) {
111 case '"':
112 return ParseJsonString();
113 case '-':
114 case '0':
115 case '1':
116 case '2':
117 case '3':
118 case '4':
119 case '5':
120 case '6':
121 case '7':
122 case '8':
123 case '9':
124 return ParseJsonNumber();
125 case 'f':
126 if (AdvanceGetChar() == 'a' && AdvanceGetChar() == 'l' &&
127 AdvanceGetChar() == 's' && AdvanceGetChar() == 'e') {
128 AdvanceSkipWhitespace();
129 return isolate()->factory()->false_value();
130 } else {
131 return ReportUnexpectedCharacter();
132 }
133 case 't':
134 if (AdvanceGetChar() == 'r' && AdvanceGetChar() == 'u' &&
135 AdvanceGetChar() == 'e') {
136 AdvanceSkipWhitespace();
137 return isolate()->factory()->true_value();
138 } else {
139 return ReportUnexpectedCharacter();
140 }
141 case 'n':
142 if (AdvanceGetChar() == 'u' && AdvanceGetChar() == 'l' &&
143 AdvanceGetChar() == 'l') {
144 AdvanceSkipWhitespace();
145 return isolate()->factory()->null_value();
146 } else {
147 return ReportUnexpectedCharacter();
148 }
149 case '{':
150 return ParseJsonObject();
151 case '[':
152 return ParseJsonArray();
153 default:
154 return ReportUnexpectedCharacter();
155 }
156 }
157
158
159 // Parse a JSON object. Position must be right at '{'.
160 Handle<Object> JsonParser::ParseJsonObject() {
161 Handle<JSFunction> object_constructor(
162 isolate()->global_context()->object_function());
163 Handle<JSObject> json_object =
164 isolate()->factory()->NewJSObject(object_constructor);
165 ASSERT_EQ(c0_, '{');
166
167 AdvanceSkipWhitespace();
168 if (c0_ != '}') {
169 do {
170 if (c0_ != '"') return ReportUnexpectedCharacter();
171 Handle<String> key = ParseJsonSymbol();
172 if (key.is_null() || c0_ != ':') return ReportUnexpectedCharacter();
173 AdvanceSkipWhitespace();
174 Handle<Object> value = ParseJsonValue();
175 if (value.is_null()) return ReportUnexpectedCharacter();
176
177 uint32_t index;
178 if (key->AsArrayIndex(&index)) {
179 SetOwnElement(json_object, index, value, kNonStrictMode);
180 } else if (key->Equals(isolate()->heap()->Proto_symbol())) {
181 SetPrototype(json_object, value);
182 } else {
183 SetLocalPropertyIgnoreAttributes(json_object, key, value, NONE);
184 }
185 } while (MatchSkipWhiteSpace(','));
186 if (c0_ != '}') {
187 return ReportUnexpectedCharacter();
188 }
189 }
190 AdvanceSkipWhitespace();
191 return json_object;
192 }
193
194 // Parse a JSON array. Position must be right at '['.
195 Handle<Object> JsonParser::ParseJsonArray() {
196 ZoneScope zone_scope(isolate(), DELETE_ON_EXIT);
197 ZoneList<Handle<Object> > elements(4);
198 ASSERT_EQ(c0_, '[');
199
200 AdvanceSkipWhitespace();
201 if (c0_ != ']') {
202 do {
203 Handle<Object> element = ParseJsonValue();
204 if (element.is_null()) return ReportUnexpectedCharacter();
205 elements.Add(element);
206 } while (MatchSkipWhiteSpace(','));
207 if (c0_ != ']') {
208 return ReportUnexpectedCharacter();
209 }
210 }
211 AdvanceSkipWhitespace();
212 // Allocate a fixed array with all the elements.
213 Handle<FixedArray> fast_elements =
214 isolate()->factory()->NewFixedArray(elements.length());
215 for (int i = 0, n = elements.length(); i < n; i++) {
216 fast_elements->set(i, *elements[i]);
217 }
218 return isolate()->factory()->NewJSArrayWithElements(fast_elements);
219 }
220
221
222 Handle<Object> JsonParser::ParseJsonNumber() {
223 bool negative = false;
224 beg_pos_ = position_;
225 if (c0_ == '-') {
226 Advance();
227 negative = true;
228 }
229 if (c0_ == '0') {
230 Advance();
231 // Prefix zero is only allowed if it's the only digit before
232 // a decimal point or exponent.
233 if ('0' <= c0_ && c0_ <= '9') return ReportUnexpectedCharacter();
234 } else {
235 int i = 0;
236 int digits = 0;
237 if (c0_ < '1' || c0_ > '9') return ReportUnexpectedCharacter();
238 do {
239 i = i * 10 + c0_ - '0';
240 digits++;
241 Advance();
242 } while (c0_ >= '0' && c0_ <= '9');
243 if (c0_ != '.' && c0_ != 'e' && c0_ != 'E' && digits < 10) {
244 number_ = (negative ? -i : i);
245 SkipWhitespace();
246 return isolate()->factory()->NewNumber(number_);
247 }
248 }
249 if (c0_ == '.') {
250 Advance();
251 if (c0_ < '0' || c0_ > '9') return ReportUnexpectedCharacter();
252 do {
253 Advance();
254 } while (c0_ >= '0' && c0_ <= '9');
255 }
256 if (AsciiAlphaToLower(c0_) == 'e') {
257 Advance();
258 if (c0_ == '-' || c0_ == '+') Advance();
259 if (c0_ < '0' || c0_ > '9') return ReportUnexpectedCharacter();
260 do {
261 Advance();
262 } while (c0_ >= '0' && c0_ <= '9');
263 }
264 int length = position_ - beg_pos_;
265 if (is_sequential_ascii_) {
266 Vector<const char> chars(seq_source_->GetChars() + beg_pos_, length);
267 number_ = StringToDouble(isolate()->unicode_cache(),
268 chars,
269 NO_FLAGS, // Hex, octal or trailing junk.
270 OS::nan_value());
271 } else {
272 Vector<char> buffer = Vector<char>::New(length);
273 String::WriteToFlat(*source_, buffer.start(), beg_pos_, position_);
274 Vector<const char> result =
275 Vector<const char>(reinterpret_cast<const char*>(buffer.start()),
276 length);
277 number_ = StringToDouble(isolate()->unicode_cache(),
278 result,
279 NO_FLAGS, // Hex, octal or trailing junk.
280 0.0);
281 buffer.Dispose();
282 }
283 SkipWhitespace();
284 return isolate()->factory()->NewNumber(number_);
285 }
286
287 Handle<String> JsonParser::SlowScanJsonString() {
288 // The currently scanned ascii characters.
289 Handle<String> ascii(isolate()->factory()->NewSubString(source_,
290 beg_pos_,
291 position_));
292 Handle<String> two_byte =
293 isolate()->factory()->NewRawTwoByteString(kInitialSpecialStringSize,
294 NOT_TENURED);
295 Handle<SeqTwoByteString> seq_two_byte =
296 Handle<SeqTwoByteString>::cast(two_byte);
297
298 int allocation_count = 1;
299 int count = 0;
300
301 while (c0_ != '"') {
302 // Create new seq string
303 if (count >= kInitialSpecialStringSize * allocation_count) {
304 allocation_count = allocation_count * 2;
305 int new_size = allocation_count * kInitialSpecialStringSize;
306 Handle<String> new_two_byte =
307 isolate()->factory()->NewRawTwoByteString(new_size,
308 NOT_TENURED);
309 uc16* char_start =
310 Handle<SeqTwoByteString>::cast(new_two_byte)->GetChars();
311 String::WriteToFlat(*seq_two_byte, char_start, 0, count);
312 seq_two_byte = Handle<SeqTwoByteString>::cast(new_two_byte);
313 }
314
315 // Check for control character (0x00-0x1f) or unterminated string (<0).
316 if (c0_ < 0x20) return Handle<String>::null();
317 if (c0_ != '\\') {
318 seq_two_byte->SeqTwoByteStringSet(count++, c0_);
319 Advance();
320 } else {
321 Advance();
322 switch (c0_) {
323 case '"':
324 case '\\':
325 case '/':
326 seq_two_byte->SeqTwoByteStringSet(count++, c0_);
327 break;
328 case 'b':
329 seq_two_byte->SeqTwoByteStringSet(count++, '\x08');
330 break;
331 case 'f':
332 seq_two_byte->SeqTwoByteStringSet(count++, '\x0c');
333 break;
334 case 'n':
335 seq_two_byte->SeqTwoByteStringSet(count++, '\x0a');
336 break;
337 case 'r':
338 seq_two_byte->SeqTwoByteStringSet(count++, '\x0d');
339 break;
340 case 't':
341 seq_two_byte->SeqTwoByteStringSet(count++, '\x09');
342 break;
343 case 'u': {
344 uc32 value = 0;
345 for (int i = 0; i < 4; i++) {
346 Advance();
347 int digit = HexValue(c0_);
348 if (digit < 0) {
349 return Handle<String>::null();
350 }
351 value = value * 16 + digit;
352 }
353 seq_two_byte->SeqTwoByteStringSet(count++, value);
354 break;
355 }
356 default:
357 return Handle<String>::null();
358 }
359 Advance();
360 }
361 }
362 // Advance past the last '"'.
363 ASSERT_EQ('"', c0_);
364 AdvanceSkipWhitespace();
365
366 // Shrink the the string to our length.
367 if (isolate()->heap()->InNewSpace(*seq_two_byte)) {
368 isolate()->heap()->new_space()->
369 ShrinkStringAtAllocationBoundary<SeqTwoByteString>(*seq_two_byte,
370 count);
371 } else {
372 int string_size = SeqTwoByteString::SizeFor(count);
373 int allocated_string_size =
374 SeqTwoByteString::SizeFor(kInitialSpecialStringSize * allocation_count);
375 int delta = allocated_string_size - string_size;
376 Address start_filler_object = seq_two_byte->address() + string_size;
377 seq_two_byte->set_length(count);
378 isolate()->heap()->CreateFillerObjectAt(start_filler_object, delta);
379 }
380 return isolate()->factory()->NewConsString(ascii, seq_two_byte);
381 }
382
383
384 template <bool is_symbol>
385 Handle<String> JsonParser::ScanJsonString() {
386 ASSERT_EQ('"', c0_);
387 Advance();
388 beg_pos_ = position_;
389 // Fast case for ascii only without escape characters.
390 while (c0_ != '"') {
391 // Check for control character (0x00-0x1f) or unterminated string (<0).
392 if (c0_ < 0x20) return Handle<String>::null();
393 if (c0_ != '\\' && c0_ < kMaxAsciiCharCode) {
394 Advance();
395 } else {
396 return SlowScanJsonString();
397 }
398 }
399 ASSERT_EQ('"', c0_);
400 end_pos_ = position_;
401 // Advance past the last '"'.
402 AdvanceSkipWhitespace();
403 if (is_sequential_ascii_ && is_symbol) {
404 return isolate()->factory()->LookupAsciiSymbol(seq_source_,
405 beg_pos_,
406 end_pos_ - beg_pos_);
407 } else {
408 return isolate()->factory()->NewSubString(source_, beg_pos_, end_pos_);
409 }
410 }
411
412 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « src/json-parser.h ('k') | src/runtime.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698