src/json-parser.h - Issue 7134010: Specialize JSON parser to only check for SequentialAsciiString once.

Side by Side Diff: src/json-parser.h

Issue 7134010: Specialize JSON parser to only check for SequentialAsciiString once. (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/

Patch Set: Created 9 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2011 the V8 project authors. All rights reserved.	1 // Copyright 2011 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 10 matching lines...) Expand all Loading...
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT	21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,	22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY	23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT	24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE	25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.	26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

27	27

28 #ifndef V8_JSON_PARSER_H_	28 #ifndef V8_JSON_PARSER_H_

29 #define V8_JSON_PARSER_H_	29 #define V8_JSON_PARSER_H_

30	30

	31 #include "v8.h"

	32

	33 #include "char-predicates-inl.h"

	34 #include "conversions.h"

	35 #include "messages.h"

	36 #include "spaces-inl.h"

31 #include "token.h"	37 #include "token.h"

32	38

33 namespace v8 {	39 namespace v8 {

34 namespace internal {	40 namespace internal {

35	41

36 // A simple json parser.	42 // A simple json parser.

	43 template <bool seq_ascii>

37 class JsonParser BASE_EMBEDDED {	44 class JsonParser BASE_EMBEDDED {

38 public:	45 public:

39 static Handle<Object> Parse(Handle<String> source) {	46 static Handle<Object> Parse(Handle<String> source) {

40 return JsonParser().ParseJson(source);	47 return JsonParser().ParseJson(source);

41 }	48 }

42	49

43 static const int kEndOfString = -1;	50 static const int kEndOfString = -1;

44	51

45 private:	52 private:

46 // Parse a string containing a single JSON value.	53 // Parse a string containing a single JSON value.

47 Handle<Object> ParseJson(Handle<String> source);	54 Handle<Object> ParseJson(Handle<String> source);

48	55

49 inline void Advance() {	56 inline void Advance() {

50 position_++;	57 position_++;

51 if (position_ > source_length_) {	58 if (position_ > source_length_) {

52 c0_ = kEndOfString;	59 c0_ = kEndOfString;

53 } else if (is_sequential_ascii_) {	60 } else if (seq_ascii) {

54 c0_ = seq_source_->SeqAsciiStringGet(position_);	61 c0_ = seq_source_->SeqAsciiStringGet(position_);

55 } else {	62 } else {

56 c0_ = source_->Get(position_);	63 c0_ = source_->Get(position_);

57 }	64 }

58 }	65 }

59	66

60 // The JSON lexical grammar is specified in the ECMAScript 5 standard,	67 // The JSON lexical grammar is specified in the ECMAScript 5 standard,

61 // section 15.12.1.1. The only allowed whitespace characters between tokens	68 // section 15.12.1.1. The only allowed whitespace characters between tokens

62 // are tab, carriage-return, newline and space.	69 // are tab, carriage-return, newline and space.

63	70

(...skipping 78 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
142 inline Isolate* isolate() { return isolate_; }	149 inline Isolate* isolate() { return isolate_; }

143	150

144 static const int kInitialSpecialStringSize = 1024;	151 static const int kInitialSpecialStringSize = 1024;

145	152

146	153

147 private:	154 private:

148 Handle<String> source_;	155 Handle<String> source_;

149 int source_length_;	156 int source_length_;

150 Handle<SeqAsciiString> seq_source_;	157 Handle<SeqAsciiString> seq_source_;

151	158

152 bool is_sequential_ascii_;

153 // begin and end position of scanned string or number	159 // begin and end position of scanned string or number

154 int beg_pos_;	160 int beg_pos_;

155 int end_pos_;	161 int end_pos_;

156	162

157 Isolate* isolate_;	163 Isolate* isolate_;

158 uc32 c0_;	164 uc32 c0_;

159 int position_;	165 int position_;

160	166

161 double number_;	167 double number_;

162 };	168 };

163	169

	170 template <bool seq_ascii>

	171 Handle<Object> JsonParser<seq_ascii>::ParseJson(Handle<String> source) {

	172 isolate_ = source->map()->isolate();

	173 source_ = Handle<String>(source->TryFlattenGetString());

	174 source_length_ = source_->length() - 1;

	175

	176 // Optimized fast case where we only have ascii characters.

	177 if (seq_ascii) {

	178 seq_source_ = Handle<SeqAsciiString>::cast(source_);

	179 }

	180

	181 // Set initial position right before the string.

	182 position_ = -1;

	183 // Advance to the first character (posibly EOS)

	184 AdvanceSkipWhitespace();

	185 Handle<Object> result = ParseJsonValue();

	186 if (result.is_null() \|\| c0_ != kEndOfString) {

	187 // Parse failed. Current character is the unexpected token.

	188

	189 const char* message;

	190 Factory* factory = isolate()->factory();

	191 Handle<JSArray> array;

	192

	193 switch (c0_) {

	194 case kEndOfString:

	195 message = "unexpected_eos";

	196 array = factory->NewJSArray(0);

	197 break;

	198 case '-':

	199 case '0':

	200 case '1':

	201 case '2':

	202 case '3':

	203 case '4':

	204 case '5':

	205 case '6':

	206 case '7':

	207 case '8':

	208 case '9':

	209 message = "unexpected_token_number";

	210 array = factory->NewJSArray(0);

	211 break;

	212 case '"':

	213 message = "unexpected_token_string";

	214 array = factory->NewJSArray(0);

	215 break;

	216 default:

	217 message = "unexpected_token";

	218 Handle<Object> name = LookupSingleCharacterStringFromCode(c0_);

	219 Handle<FixedArray> element = factory->NewFixedArray(1);

	220 element->set(0, *name);

	221 array = factory->NewJSArrayWithElements(element);

	222 break;

	223 }

	224

	225 MessageLocation location(factory->NewScript(source),

	226 position_,

	227 position_ + 1);

	228 Handle<Object> result = factory->NewSyntaxError(message, array);

	229 isolate()->Throw(*result, &location);

	230 return Handle<Object>::null();

	231 }

	232 return result;

	233 }

	234

	235

	236 // Parse any JSON value.

	237 template <bool seq_ascii>

	238 Handle<Object> JsonParser<seq_ascii>::ParseJsonValue() {

	239 switch (c0_) {

	240 case '"':

	241 return ParseJsonString();

	242 case '-':

	243 case '0':

	244 case '1':

	245 case '2':

	246 case '3':

	247 case '4':

	248 case '5':

	249 case '6':

	250 case '7':

	251 case '8':

	252 case '9':

	253 return ParseJsonNumber();

	254 case 'f':

	255 if (AdvanceGetChar() == 'a' && AdvanceGetChar() == 'l' &&

	256 AdvanceGetChar() == 's' && AdvanceGetChar() == 'e') {

	257 AdvanceSkipWhitespace();

	258 return isolate()->factory()->false_value();

	259 } else {

	260 return ReportUnexpectedCharacter();

	261 }

	262 case 't':

	263 if (AdvanceGetChar() == 'r' && AdvanceGetChar() == 'u' &&

	264 AdvanceGetChar() == 'e') {

	265 AdvanceSkipWhitespace();

	266 return isolate()->factory()->true_value();

	267 } else {

	268 return ReportUnexpectedCharacter();

	269 }

	270 case 'n':

	271 if (AdvanceGetChar() == 'u' && AdvanceGetChar() == 'l' &&

	272 AdvanceGetChar() == 'l') {

	273 AdvanceSkipWhitespace();

	274 return isolate()->factory()->null_value();

	275 } else {

	276 return ReportUnexpectedCharacter();

	277 }

	278 case '{':

	279 return ParseJsonObject();

	280 case '[':

	281 return ParseJsonArray();

	282 default:

	283 return ReportUnexpectedCharacter();

	284 }

	285 }

	286

	287

	288 // Parse a JSON object. Position must be right at '{'.

	289 template <bool seq_ascii>

	290 Handle<Object> JsonParser<seq_ascii>::ParseJsonObject() {

	291 Handle<JSFunction> object_constructor(

	292 isolate()->global_context()->object_function());

	293 Handle<JSObject> json_object =

	294 isolate()->factory()->NewJSObject(object_constructor);

	295 ASSERT_EQ(c0_, '{');

	296

	297 AdvanceSkipWhitespace();

	298 if (c0_ != '}') {

	299 do {

	300 if (c0_ != '"') return ReportUnexpectedCharacter();

	301 Handle<String> key = ParseJsonSymbol();

	302 if (key.is_null() \|\| c0_ != ':') return ReportUnexpectedCharacter();

	303 AdvanceSkipWhitespace();

	304 Handle<Object> value = ParseJsonValue();

	305 if (value.is_null()) return ReportUnexpectedCharacter();

	306

	307 uint32_t index;

	308 if (key->AsArrayIndex(&index)) {

	309 SetOwnElement(json_object, index, value, kNonStrictMode);

	310 } else if (key->Equals(isolate()->heap()->Proto_symbol())) {

	311 SetPrototype(json_object, value);

	312 } else {

	313 SetLocalPropertyIgnoreAttributes(json_object, key, value, NONE);

	314 }

	315 } while (MatchSkipWhiteSpace(','));

	316 if (c0_ != '}') {

	317 return ReportUnexpectedCharacter();

	318 }

	319 }

	320 AdvanceSkipWhitespace();

	321 return json_object;

	322 }

	323

	324 // Parse a JSON array. Position must be right at '['.

	325 template <bool seq_ascii>

	326 Handle<Object> JsonParser<seq_ascii>::ParseJsonArray() {

	327 ZoneScope zone_scope(isolate(), DELETE_ON_EXIT);

	328 ZoneList<Handle<Object> > elements(4);

	329 ASSERT_EQ(c0_, '[');

	330

	331 AdvanceSkipWhitespace();

	332 if (c0_ != ']') {

	333 do {

	334 Handle<Object> element = ParseJsonValue();

	335 if (element.is_null()) return ReportUnexpectedCharacter();

	336 elements.Add(element);

	337 } while (MatchSkipWhiteSpace(','));

	338 if (c0_ != ']') {

	339 return ReportUnexpectedCharacter();

	340 }

	341 }

	342 AdvanceSkipWhitespace();

	343 // Allocate a fixed array with all the elements.

	344 Handle<FixedArray> fast_elements =

	345 isolate()->factory()->NewFixedArray(elements.length());

	346 for (int i = 0, n = elements.length(); i < n; i++) {

	347 fast_elements->set(i, *elements[i]);

	348 }

	349 return isolate()->factory()->NewJSArrayWithElements(fast_elements);

	350 }

	351

	352

	353 template <bool seq_ascii>

	354 Handle<Object> JsonParser<seq_ascii>::ParseJsonNumber() {

	355 bool negative = false;

	356 beg_pos_ = position_;

	357 if (c0_ == '-') {

	358 Advance();

	359 negative = true;

	360 }

	361 if (c0_ == '0') {

	362 Advance();

	363 // Prefix zero is only allowed if it's the only digit before

	364 // a decimal point or exponent.

	365 if ('0' <= c0_ && c0_ <= '9') return ReportUnexpectedCharacter();

	366 } else {

	367 int i = 0;

	368 int digits = 0;

	369 if (c0_ < '1' \|\| c0_ > '9') return ReportUnexpectedCharacter();

	370 do {

	371 i = i * 10 + c0_ - '0';

	372 digits++;

	373 Advance();

	374 } while (c0_ >= '0' && c0_ <= '9');

	375 if (c0_ != '.' && c0_ != 'e' && c0_ != 'E' && digits < 10) {

	376 number_ = (negative ? -i : i);

	377 SkipWhitespace();

	378 return isolate()->factory()->NewNumber(number_);

	379 }

	380 }

	381 if (c0_ == '.') {

	382 Advance();

	383 if (c0_ < '0' \|\| c0_ > '9') return ReportUnexpectedCharacter();

	384 do {

	385 Advance();

	386 } while (c0_ >= '0' && c0_ <= '9');

	387 }

	388 if (AsciiAlphaToLower(c0_) == 'e') {

	389 Advance();

	390 if (c0_ == '-' \|\| c0_ == '+') Advance();

	391 if (c0_ < '0' \|\| c0_ > '9') return ReportUnexpectedCharacter();

	392 do {

	393 Advance();

	394 } while (c0_ >= '0' && c0_ <= '9');

	395 }

	396 int length = position_ - beg_pos_;

	397 if (seq_ascii) {

	398 Vector<const char> chars(seq_source_->GetChars() + beg_pos_, length);

	399 number_ = StringToDouble(isolate()->unicode_cache(),

	400 chars,

	401 NO_FLAGS, // Hex, octal or trailing junk.

	402 OS::nan_value());

	403 } else {

	404 Vector<char> buffer = Vector<char>::New(length);

	405 String::WriteToFlat(*source_, buffer.start(), beg_pos_, position_);

	406 Vector<const char> result =

	407 Vector<const char>(reinterpret_cast<const char*>(buffer.start()),

	408 length);

	409 number_ = StringToDouble(isolate()->unicode_cache(),

	410 result,

	411 NO_FLAGS, // Hex, octal or trailing junk.

	412 0.0);

	413 buffer.Dispose();

	414 }

	415 SkipWhitespace();

	416 return isolate()->factory()->NewNumber(number_);

	417 }

	418

	419 template <bool seq_ascii>

	420 Handle<String> JsonParser<seq_ascii>::SlowScanJsonString() {

	421 // The currently scanned ascii characters.

	422 Handle<String> ascii(isolate()->factory()->NewSubString(source_,

	423 beg_pos_,

	424 position_));

	425 Handle<String> two_byte =

	426 isolate()->factory()->NewRawTwoByteString(kInitialSpecialStringSize,

	427 NOT_TENURED);

	428 Handle<SeqTwoByteString> seq_two_byte =

	429 Handle<SeqTwoByteString>::cast(two_byte);

	430

	431 int allocation_count = 1;

	432 int count = 0;

	433

	434 while (c0_ != '"') {

	435 // Create new seq string

	436 if (count >= kInitialSpecialStringSize * allocation_count) {

	437 allocation_count = allocation_count * 2;

	438 int new_size = allocation_count * kInitialSpecialStringSize;

	439 Handle<String> new_two_byte =

	440 isolate()->factory()->NewRawTwoByteString(new_size,

	441 NOT_TENURED);

	442 uc16* char_start =

	443 Handle<SeqTwoByteString>::cast(new_two_byte)->GetChars();

	444 String::WriteToFlat(*seq_two_byte, char_start, 0, count);

	445 seq_two_byte = Handle<SeqTwoByteString>::cast(new_two_byte);

	446 }

	447

	448 // Check for control character (0x00-0x1f) or unterminated string (<0).

	449 if (c0_ < 0x20) return Handle<String>::null();

	450 if (c0_ != '\\') {

	451 seq_two_byte->SeqTwoByteStringSet(count++, c0_);

	452 Advance();

	453 } else {

	454 Advance();

	455 switch (c0_) {

	456 case '"':

	457 case '\\':

	458 case '/':

	459 seq_two_byte->SeqTwoByteStringSet(count++, c0_);

	460 break;

	461 case 'b':

	462 seq_two_byte->SeqTwoByteStringSet(count++, '\x08');

	463 break;

	464 case 'f':

	465 seq_two_byte->SeqTwoByteStringSet(count++, '\x0c');

	466 break;

	467 case 'n':

	468 seq_two_byte->SeqTwoByteStringSet(count++, '\x0a');

	469 break;

	470 case 'r':

	471 seq_two_byte->SeqTwoByteStringSet(count++, '\x0d');

	472 break;

	473 case 't':

	474 seq_two_byte->SeqTwoByteStringSet(count++, '\x09');

	475 break;

	476 case 'u': {

	477 uc32 value = 0;

	478 for (int i = 0; i < 4; i++) {

	479 Advance();

	480 int digit = HexValue(c0_);

	481 if (digit < 0) {

	482 return Handle<String>::null();

	483 }

	484 value = value * 16 + digit;

	485 }

	486 seq_two_byte->SeqTwoByteStringSet(count++, value);

	487 break;

	488 }

	489 default:

	490 return Handle<String>::null();

	491 }

	492 Advance();

	493 }

	494 }

	495 // Advance past the last '"'.

	496 ASSERT_EQ('"', c0_);

	497 AdvanceSkipWhitespace();

	498

	499 // Shrink the the string to our length.

	500 if (isolate()->heap()->InNewSpace(*seq_two_byte)) {

	501 isolate()->heap()->new_space()->

	502 template ShrinkStringAtAllocationBoundary<SeqTwoByteString>(

	503 *seq_two_byte, count);

	504 } else {

	505 int string_size = SeqTwoByteString::SizeFor(count);

	506 int allocated_string_size =

	507 SeqTwoByteString::SizeFor(kInitialSpecialStringSize * allocation_count);

	508 int delta = allocated_string_size - string_size;

	509 Address start_filler_object = seq_two_byte->address() + string_size;

	510 seq_two_byte->set_length(count);

	511 isolate()->heap()->CreateFillerObjectAt(start_filler_object, delta);

	512 }

	513 return isolate()->factory()->NewConsString(ascii, seq_two_byte);

	514 }

	515

	516 template <bool seq_ascii>

	517 template <bool is_symbol>

	518 Handle<String> JsonParser<seq_ascii>::ScanJsonString() {

	519 ASSERT_EQ('"', c0_);

	520 Advance();

	521 beg_pos_ = position_;

	522 // Fast case for ascii only without escape characters.

	523 while (c0_ != '"') {

	524 // Check for control character (0x00-0x1f) or unterminated string (<0).

	525 if (c0_ < 0x20) return Handle<String>::null();

	526 if (c0_ != '\\' && (seq_ascii \|\| c0_ < kMaxAsciiCharCode)) {

	527 Advance();

	528 } else {

	529 return this->SlowScanJsonString();

	530 }

	531 }

	532 ASSERT_EQ('"', c0_);

	533 end_pos_ = position_;

	534 // Advance past the last '"'.

	535 AdvanceSkipWhitespace();

	536 if (seq_ascii && is_symbol) {

	537 return isolate()->factory()->LookupAsciiSymbol(seq_source_,

	538 beg_pos_,

	539 end_pos_ - beg_pos_);

	540 } else {

	541 return isolate()->factory()->NewSubString(source_, beg_pos_, end_pos_);

	542 }

	543 }

	544

164 } } // namespace v8::internal	545 } } // namespace v8::internal

165	546

166 #endif // V8_JSON_PARSER_H_	547 #endif // V8_JSON_PARSER_H_

OLD	NEW

« no previous file with comments | « src/SConscript ('k') | src/json-parser.cc » ('j') | no next file with comments »