src/json-parser.cc - Issue 7020018: Remove scanner abstraction layer from JSON parsing. The change in api.cc is t...

Side by Side Diff: src/json-parser.cc

Issue 7020018: Remove scanner abstraction layer from JSON parsing. The change in api.cc is t... (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/

Patch Set: '' Created 9 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2011 the V8 project authors. All rights reserved.	1 // Copyright 2011 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 35 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
46 if (source_->IsSeqAsciiString()) {	46 if (source_->IsSeqAsciiString()) {

47 is_sequential_ascii_ = true;	47 is_sequential_ascii_ = true;

48 seq_source_ = Handle<SeqAsciiString>::cast(source_);	48 seq_source_ = Handle<SeqAsciiString>::cast(source_);

49 } else {	49 } else {

50 is_sequential_ascii_ = false;	50 is_sequential_ascii_ = false;

51 }	51 }

52	52

53 // Set initial position right before the string.	53 // Set initial position right before the string.

54 position_ = -1;	54 position_ = -1;

55 // Advance to the first character (posibly EOS)	55 // Advance to the first character (posibly EOS)

56 Advance();	56 AdvanceWS();

57 Next();

58 Handle<Object> result = ParseJsonValue();	57 Handle<Object> result = ParseJsonValue();

59 if (result.is_null() \|\| Next() != Token::EOS) {	58 if (result.is_null() \|\| c0_ != kEndOfString) {

60 // Parse failed. Scanner's current token is the unexpected token.	59 // Parse failed. Current character is the unexpected token.

61 Token::Value token = current_.token;

62	60

63 const char* message;	61 const char* message;

64 const char* name_opt = NULL;

65	62

66 switch (token) {	63 switch (c0_) {

67 case Token::EOS:	64 case kEndOfString:

68 message = "unexpected_eos";	65 message = "unexpected_eos";

69 break;	66 break;

70 case Token::NUMBER:	67 case '-':

	68 case '0':

	69 case '1':

	70 case '2':

	71 case '3':

	72 case '4':

	73 case '5':

	74 case '6':

	75 case '7':

	76 case '8':

	77 case '9':

71 message = "unexpected_token_number";	78 message = "unexpected_token_number";

72 break;	79 break;

73 case Token::STRING:	80 case '"':

74 message = "unexpected_token_string";	81 message = "unexpected_token_string";
	Lasse Reichstein 2011/06/01 11:03:58 This might change the behavior for unterminated st This might change the behavior for unterminated strings, but I guess that's acceptable. sandholm 2011/06/01 13:45:20 Done. Show quoted text On 2011/06/01 11:03:58, Lasse Reichstein wrote: > This might change the behavior for unterminated strings, but I guess that's > acceptable. Done.
75 break;	82 break;

76 case Token::IDENTIFIER:

77 case Token::FUTURE_RESERVED_WORD:

78 message = "unexpected_token_identifier";

79 break;

80 default:	83 default:
	Lasse Reichstein 2011/06/01 11:03:58 Agree, it's probably better to not have the unexpe Agree, it's probably better to not have the unexpected_token_identifier message for JSON. sandholm 2011/06/01 13:45:20 Done. Show quoted text On 2011/06/01 11:03:58, Lasse Reichstein wrote: > Agree, it's probably better to not have the unexpected_token_identifier message > for JSON. Done.
81 message = "unexpected_token";	84 message = "unexpected_token";
	Lasse Reichstein 2011/06/01 11:03:58 The unexpected_token message needs/expects second The unexpected_token message needs/expects second argument. See messages.js. sandholm 2011/06/01 13:45:20 Done. Show quoted text On 2011/06/01 11:03:58, Lasse Reichstein wrote: > The unexpected_token message needs/expects second argument. See messages.js. Done.
82 name_opt = Token::String(token);

83 ASSERT(name_opt != NULL);

84 break;	85 break;

85 }	86 }

86	87

87 Factory* factory = isolate()->factory();	88 Factory* factory = isolate()->factory();

88 MessageLocation location(factory->NewScript(source),	89 MessageLocation location(factory->NewScript(source),

89 current_.beg_pos,	90 position_,

90 current_.end_pos);	91 position_ + 1);

91 Handle<JSArray> array;	92 Handle<JSArray> array = factory->NewJSArray(0);

92 if (name_opt == NULL) {

93 array = factory->NewJSArray(0);

94 } else {

95 Handle<String> name = factory->NewStringFromUtf8(CStrVector(name_opt));

96 Handle<FixedArray> element = factory->NewFixedArray(1);

97 element->set(0, *name);

98 array = factory->NewJSArrayWithElements(element);

99 }

100 Handle<Object> result = factory->NewSyntaxError(message, array);	93 Handle<Object> result = factory->NewSyntaxError(message, array);

101 isolate()->Throw(*result, &location);	94 isolate()->Throw(*result, &location);

102 return Handle<Object>::null();	95 return Handle<Object>::null();

103 }	96 }

104 return result;	97 return result;

105 }	98 }

106	99

107	100

108 // Parse any JSON value.	101 // Parse any JSON value.

109 Handle<Object> JsonParser::ParseJsonValue() {	102 Handle<Object> JsonParser::ParseJsonValue() {

110 Token::Value token = Next();	103 switch (c0_) {

111 switch (token) {	104 case '"':

112 case Token::STRING:	105 return ParseJsonString();

113 return GetString(false);	106 case '-':

114 case Token::NUMBER:	107 case '0':

115 return isolate()->factory()->NewNumber(number_);	108 case '1':

116 case Token::FALSE_LITERAL:	109 case '2':

117 return isolate()->factory()->false_value();	110 case '3':

118 case Token::TRUE_LITERAL:	111 case '4':

119 return isolate()->factory()->true_value();	112 case '5':

120 case Token::NULL_LITERAL:	113 case '6':

121 return isolate()->factory()->null_value();	114 case '7':

122 case Token::LBRACE:	115 case '8':

	116 case '9':

	117 return ParseJsonNumber();

	118 case 'f':

	119 if (AdvanceGetChar() == 'a' && AdvanceGetChar() == 'l' &&

	120 AdvanceGetChar() == 's' && AdvanceGetChar() == 'e') {

	121 AdvanceWS();
	Lasse Reichstein 2011/06/01 11:03:58 Don't use AdvanceWS here, just check for non-ident Don't use AdvanceWS here, just check for non-identifier-part of c0_ after matching 'e', and let the main scan loop skip whitespace. Make sure that the cursor position is correct (pointing to the 'f') when reporting the incorrect identifier. sandholm 2011/06/01 13:45:20 I think this is fine. The invariant is that we're Show quoted text On 2011/06/01 11:03:58, Lasse Reichstein wrote: > Don't use AdvanceWS here, just check for non-identifier-part of c0_ after > matching 'e', and let the main scan loop skip whitespace. > Make sure that the cursor position is correct (pointing to the 'f') when > reporting the incorrect identifier. I think this is fine. The invariant is that we're at the next non-whitespace char. Also, I think it's fine to use whatever position we're when failing. E.g. with "falsse" we would report the error here: fals(s)e.
	122 return isolate()->factory()->false_value();

	123 } else {

	124 return ReportUnexpectedToken();
	Lasse Reichstein 2011/06/01 11:03:58 Maybe change the name, now that we don't use Token Maybe change the name, now that we don't use Token values. sandholm 2011/06/01 13:45:20 Done. Show quoted text On 2011/06/01 11:03:58, Lasse Reichstein wrote: > Maybe change the name, now that we don't use Token values. Done.
	125 }

	126 case 't':

	127 if (AdvanceGetChar() == 'r' && AdvanceGetChar() == 'u' &&

	128 AdvanceGetChar() == 'e') {

	129 AdvanceWS();
	Lasse Reichstein 2011/06/01 11:03:58 As above. As above. sandholm 2011/06/01 13:45:20 ditto Show quoted text On 2011/06/01 11:03:58, Lasse Reichstein wrote: > As above. ditto
	130 return isolate()->factory()->true_value();

	131 } else {

	132 return ReportUnexpectedToken();

	133 }

	134 case 'n':

	135 if (AdvanceGetChar() == 'u' && AdvanceGetChar() == 'l' &&

	136 AdvanceGetChar() == 'l') {

	137 AdvanceWS();
	Lasse Reichstein 2011/06/01 11:03:58 And again. And again. sandholm 2011/06/01 13:45:20 ditto Show quoted text On 2011/06/01 11:03:58, Lasse Reichstein wrote: > And again. ditto
	138 return isolate()->factory()->null_value();

	139 } else {

	140 return ReportUnexpectedToken();

	141 }

	142 case '{':

123 return ParseJsonObject();	143 return ParseJsonObject();

124 case Token::LBRACK:	144 case '[':

125 return ParseJsonArray();	145 return ParseJsonArray();

126 default:	146 default:

127 return ReportUnexpectedToken();	147 return ReportUnexpectedToken();

128 }	148 }

129 }	149 }

130	150

131	151

132 // Parse a JSON object. Scanner must be right after '{' token.	152 // Parse a JSON object. Position must be right at '{'.

133 Handle<Object> JsonParser::ParseJsonObject() {	153 Handle<Object> JsonParser::ParseJsonObject() {

134 Handle<JSFunction> object_constructor(	154 Handle<JSFunction> object_constructor(

135 isolate()->global_context()->object_function());	155 isolate()->global_context()->object_function());

136 Handle<JSObject> json_object =	156 Handle<JSObject> json_object =

137 isolate()->factory()->NewJSObject(object_constructor);	157 isolate()->factory()->NewJSObject(object_constructor);

138	158

139 if (Peek() == Token::RBRACE) {	159 AdvanceWS();

140 Next();	160 if (c0_ != '}') {

141 } else {

142 do {	161 do {

143 if (Next() != Token::STRING) {	162 Handle<String> key = ParseJsonSymbol();

144 return ReportUnexpectedToken();	163 if (key.is_null() \|\| c0_ != ':') return ReportUnexpectedToken();

145 }	164 AdvanceWS();

146 Handle<String> key = GetString(true);

147 if (Next() != Token::COLON) {

148 return ReportUnexpectedToken();

149 }

150

151 Handle<Object> value = ParseJsonValue();	165 Handle<Object> value = ParseJsonValue();

152 if (value.is_null()) return Handle<Object>::null();	166 if (value.is_null()) return ReportUnexpectedToken();

153	167

154 uint32_t index;	168 uint32_t index;

155 if (key->AsArrayIndex(&index)) {	169 if (key->AsArrayIndex(&index)) {

156 SetOwnElement(json_object, index, value, kNonStrictMode);	170 SetOwnElement(json_object, index, value, kNonStrictMode);

157 } else if (key->Equals(isolate()->heap()->Proto_symbol())) {	171 } else if (key->Equals(isolate()->heap()->Proto_symbol())) {

158 SetPrototype(json_object, value);	172 SetPrototype(json_object, value);

159 } else {	173 } else {

160 SetLocalPropertyIgnoreAttributes(json_object, key, value, NONE);	174 SetLocalPropertyIgnoreAttributes(json_object, key, value, NONE);

161 }	175 }

162 } while (Next() == Token::COMMA);	176 } while (c0_ == ',' && AdvanceWS());
	Lasse Reichstein 2011/06/01 11:03:58 Ah, so that's why AdvanceWS returns true. Don't do Ah, so that's why AdvanceWS returns true. Don't do that! In general, don't have side effects in condition expressions. sandholm 2011/06/01 13:45:20 Fixed with a AdvanceWhiteSpacesOnlyIfMatch(',') ca Show quoted text On 2011/06/01 11:03:58, Lasse Reichstein wrote: > Ah, so that's why AdvanceWS returns true. > Don't do that! In general, don't have side effects in condition expressions. Fixed with a AdvanceWhiteSpacesOnlyIfMatch(',') call.
163 if (current_.token != Token::RBRACE) {	177 if (c0_ != '}') {

164 return ReportUnexpectedToken();	178 return ReportUnexpectedToken();

165 }	179 }

166 }	180 }

	181 AdvanceWS();

167 return json_object;	182 return json_object;

168 }	183 }

169	184

170 // Parse a JSON array. Scanner must be right after '[' token.	185 // Parse a JSON array. Position must be right at '['.

171 Handle<Object> JsonParser::ParseJsonArray() {	186 Handle<Object> JsonParser::ParseJsonArray() {
	Lasse Reichstein 2011/06/01 11:03:58 If position must be at '[', do ASSERT_EQ(c0_, '[ If position must be at '[', do ASSERT_EQ(c0_, '['); sandholm 2011/06/01 13:45:20 Done. Show quoted text On 2011/06/01 11:03:58, Lasse Reichstein wrote: > If position must be at '[', do > ASSERT_EQ(c0_, '['); Done.
172 ZoneScope zone_scope(isolate(), DELETE_ON_EXIT);	187 ZoneScope zone_scope(isolate(), DELETE_ON_EXIT);

173 ZoneList<Handle<Object> > elements(4);	188 ZoneList<Handle<Object> > elements(4);

174	189

175 Token::Value token = Peek();	190 AdvanceWS();

176 if (token == Token::RBRACK) {	191 if (c0_ != ']') {

177 Next();

178 } else {

179 do {	192 do {

180 Handle<Object> element = ParseJsonValue();	193 Handle<Object> element = ParseJsonValue();

181 if (element.is_null()) return Handle<Object>::null();	194 if (element.is_null()) return ReportUnexpectedToken();

182 elements.Add(element);	195 elements.Add(element);

183 token = Next();	196 } while (c0_ == ',' && AdvanceWS());

184 } while (token == Token::COMMA);	197 if (c0_ != ']') {

185 if (token != Token::RBRACK) {

186 return ReportUnexpectedToken();	198 return ReportUnexpectedToken();

187 }	199 }

188 }	200 }

189	201 AdvanceWS();

190 // Allocate a fixed array with all the elements.	202 // Allocate a fixed array with all the elements.

191 Handle<FixedArray> fast_elements =	203 Handle<FixedArray> fast_elements =

192 isolate()->factory()->NewFixedArray(elements.length());	204 isolate()->factory()->NewFixedArray(elements.length());

193

194 for (int i = 0, n = elements.length(); i < n; i++) {	205 for (int i = 0, n = elements.length(); i < n; i++) {

195 fast_elements->set(i, *elements[i]);	206 fast_elements->set(i, *elements[i]);

196 }	207 }

197

198 return isolate()->factory()->NewJSArrayWithElements(fast_elements);	208 return isolate()->factory()->NewJSArrayWithElements(fast_elements);

199 }	209 }

200	210

201	211

202 Token::Value JsonParser::Next() {	212 Handle<Object> JsonParser::ParseJsonNumber() {

203 current_ = next_;

204 ScanJson();

205 return current_.token;

206 }

207

208 void JsonParser::ScanJson() {

209 if (source_->IsSeqAsciiString()) {

210 is_sequential_ascii_ = true;

211 } else {

212 is_sequential_ascii_ = false;

213 }

214

215 Token::Value token;

216 do {

217 // Remember the position of the next token

218 next_.beg_pos = position_;

219 switch (c0_) {

220 case '\t':

221 case '\r':

222 case '\n':

223 case ' ':

224 Advance();

225 token = Token::WHITESPACE;

226 break;

227 case '{':

228 Advance();

229 token = Token::LBRACE;

230 break;

231 case '}':

232 Advance();

233 token = Token::RBRACE;

234 break;

235 case '[':

236 Advance();

237 token = Token::LBRACK;

238 break;

239 case ']':

240 Advance();

241 token = Token::RBRACK;

242 break;

243 case ':':

244 Advance();

245 token = Token::COLON;

246 break;

247 case ',':

248 Advance();

249 token = Token::COMMA;

250 break;

251 case '"':

252 token = ScanJsonString();

253 break;

254 case '-':

255 case '0':

256 case '1':

257 case '2':

258 case '3':

259 case '4':

260 case '5':

261 case '6':

262 case '7':

263 case '8':

264 case '9':

265 token = ScanJsonNumber();

266 break;

267 case 't':

268 token = ScanJsonIdentifier("true", Token::TRUE_LITERAL);

269 break;

270 case 'f':

271 token = ScanJsonIdentifier("false", Token::FALSE_LITERAL);

272 break;

273 case 'n':

274 token = ScanJsonIdentifier("null", Token::NULL_LITERAL);

275 break;

276 default:

277 if (c0_ < 0) {

278 Advance();

279 token = Token::EOS;

280 } else {

281 Advance();

282 token = Token::ILLEGAL;

283 }

284 }

285 } while (token == Token::WHITESPACE);

286

287 next_.end_pos = position_;

288 next_.token = token;

289 }

290

291

292 Token::Value JsonParser::ScanJsonIdentifier(const char* text,

293 Token::Value token) {

294 while (*text != '\0') {

295 if (c0_ != *text) return Token::ILLEGAL;

296 Advance();

297 text++;

298 }

299 return token;

300 }

301

302

303 Token::Value JsonParser::ScanJsonNumber() {

304 bool negative = false;	213 bool negative = false;

305	214 beg_pos_ = position_;

306 if (c0_ == '-') {	215 if (c0_ == '-') {

307 Advance();	216 Advance();

308 negative = true;	217 negative = true;

309 }	218 }

310 if (c0_ == '0') {	219 if (c0_ == '0') {

311 Advance();	220 Advance();

312 // Prefix zero is only allowed if it's the only digit before	221 // Prefix zero is only allowed if it's the only digit before

313 // a decimal point or exponent.	222 // a decimal point or exponent.

314 if ('0' <= c0_ && c0_ <= '9') return Token::ILLEGAL;	223 if ('0' <= c0_ && c0_ <= '9') return ReportUnexpectedToken();

315 } else {	224 } else {

316 int i = 0;	225 int i = 0;

317 int digits = 0;	226 int digits = 0;

318 if (c0_ < '1' \|\| c0_ > '9') return Token::ILLEGAL;	227 if (c0_ < '1' \|\| c0_ > '9') return ReportUnexpectedToken();

319 do {	228 do {

320 i = i * 10 + c0_ - '0';	229 i = i * 10 + c0_ - '0';

321 digits++;	230 digits++;

322 Advance();	231 Advance();

323 } while (c0_ >= '0' && c0_ <= '9');	232 } while (c0_ >= '0' && c0_ <= '9');

324 if (c0_ != '.' && c0_ != 'e' && c0_ != 'E' && digits < 10) {	233 if (c0_ != '.' && c0_ != 'e' && c0_ != 'E' && digits < 10) {

325 number_ = (negative ? -i : i);	234 number_ = (negative ? -i : i);

326 return Token::NUMBER;	235 SkipWS();

	236 return isolate()->factory()->NewNumber(number_);

327 }	237 }

328 }	238 }

329 if (c0_ == '.') {	239 if (c0_ == '.') {

330 Advance();	240 Advance();

331 if (c0_ < '0' \|\| c0_ > '9') return Token::ILLEGAL;	241 if (c0_ < '0' \|\| c0_ > '9') return ReportUnexpectedToken();

332 do {	242 do {

333 Advance();	243 Advance();

334 } while (c0_ >= '0' && c0_ <= '9');	244 } while (c0_ >= '0' && c0_ <= '9');

335 }	245 }

336 if (AsciiAlphaToLower(c0_) == 'e') {	246 if (AsciiAlphaToLower(c0_) == 'e') {

337 Advance();	247 Advance();

338 if (c0_ == '-' \|\| c0_ == '+') Advance();	248 if (c0_ == '-' \|\| c0_ == '+') Advance();

339 if (c0_ < '0' \|\| c0_ > '9') return Token::ILLEGAL;	249 if (c0_ < '0' \|\| c0_ > '9') return ReportUnexpectedToken();

340 do {	250 do {

341 Advance();	251 Advance();

342 } while (c0_ >= '0' && c0_ <= '9');	252 } while (c0_ >= '0' && c0_ <= '9');

343 }	253 }

	254 int length = position_ - beg_pos_;

344 if (is_sequential_ascii_) {	255 if (is_sequential_ascii_) {

345 Vector<const char> chars(seq_source_->GetChars() + next_.beg_pos,	256 Vector<const char> chars(seq_source_->GetChars() + beg_pos_, length);

346 position_ - next_.beg_pos);

347 number_ = StringToDouble(isolate()->unicode_cache(),	257 number_ = StringToDouble(isolate()->unicode_cache(),

348 chars,	258 chars,

349 NO_FLAGS, // Hex, octal or trailing junk.	259 NO_FLAGS, // Hex, octal or trailing junk.

350 OS::nan_value());	260 OS::nan_value());

351 } else {	261 } else {

352 Vector<char> buffer = Vector<char>::New(position_ - next_.beg_pos);	262 Vector<char> buffer = Vector<char>::New(length);

353 String::WriteToFlat(*source_, buffer.start(), next_.beg_pos, position_);	263 String::WriteToFlat(*source_, buffer.start(), beg_pos_, position_);

354 Vector<const char> result =	264 Vector<const char> result =

355 Vector<const char>(reinterpret_cast<const char*>(buffer.start()),	265 Vector<const char>(reinterpret_cast<const char*>(buffer.start()),

356 position_ - next_.beg_pos);	266 length);

357 number_ = StringToDouble(isolate()->unicode_cache(),	267 number_ = StringToDouble(isolate()->unicode_cache(),

358 result,	268 result,

359 NO_FLAGS, // Hex, octal or trailing junk.	269 NO_FLAGS, // Hex, octal or trailing junk.

360 0.0);	270 0.0);

361 buffer.Dispose();	271 buffer.Dispose();

362 }	272 }

363 return Token::NUMBER;	273 SkipWS();

	274 return isolate()->factory()->NewNumber(number_);

364 }	275 }

365	276

366 Token::Value JsonParser::SlowScanJsonString() {	277 Handle<Object> JsonParser::SlowScanJsonString() {

367 // The currently scanned ascii characters.	278 // The currently scanned ascii characters.

368 Handle<String> ascii(isolate()->factory()->NewSubString(source_,	279 Handle<String> ascii(isolate()->factory()->NewSubString(source_,

369 next_.beg_pos + 1,	280 beg_pos_,

370 position_));	281 position_));

371 Handle<String> two_byte =	282 Handle<String> two_byte =

372 isolate()->factory()->NewRawTwoByteString(kInitialSpecialStringSize,	283 isolate()->factory()->NewRawTwoByteString(kInitialSpecialStringSize,

373 NOT_TENURED);	284 NOT_TENURED);

374 Handle<SeqTwoByteString> seq_two_byte =	285 Handle<SeqTwoByteString> seq_two_byte =

375 Handle<SeqTwoByteString>::cast(two_byte);	286 Handle<SeqTwoByteString>::cast(two_byte);

376	287

377 int allocation_count = 1;	288 int allocation_count = 1;

378 int count = 0;	289 int count = 0;

379	290

380 while (c0_ != '"') {	291 while (c0_ != '"') {

381 // Create new seq string	292 // Create new seq string

382 if (count >= kInitialSpecialStringSize * allocation_count) {	293 if (count >= kInitialSpecialStringSize * allocation_count) {

383 allocation_count = allocation_count * 2;	294 allocation_count = allocation_count * 2;

384 int new_size = allocation_count * kInitialSpecialStringSize;	295 int new_size = allocation_count * kInitialSpecialStringSize;

385 Handle<String> new_two_byte =	296 Handle<String> new_two_byte =

386 isolate()->factory()->NewRawTwoByteString(new_size,	297 isolate()->factory()->NewRawTwoByteString(new_size,

387 NOT_TENURED);	298 NOT_TENURED);

388 uc16* char_start =	299 uc16* char_start =

389 Handle<SeqTwoByteString>::cast(new_two_byte)->GetChars();	300 Handle<SeqTwoByteString>::cast(new_two_byte)->GetChars();

390 String::WriteToFlat(*seq_two_byte, char_start, 0, count);	301 String::WriteToFlat(*seq_two_byte, char_start, 0, count);

391 seq_two_byte = Handle<SeqTwoByteString>::cast(new_two_byte);	302 seq_two_byte = Handle<SeqTwoByteString>::cast(new_two_byte);

392 }	303 }

393	304

394 // Check for control character (0x00-0x1f) or unterminated string (<0).	305 // Check for control character (0x00-0x1f) or unterminated string (<0).

395 if (c0_ < 0x20) return Token::ILLEGAL;	306 if (c0_ < 0x20) return ReportUnexpectedToken();

396 if (c0_ != '\\') {	307 if (c0_ != '\\') {

397 seq_two_byte->SeqTwoByteStringSet(count++, c0_);	308 seq_two_byte->SeqTwoByteStringSet(count++, c0_);

398 Advance();	309 Advance();

399 } else {	310 } else {

400 Advance();	311 Advance();

401 switch (c0_) {	312 switch (c0_) {

402 case '"':	313 case '"':

403 case '\\':	314 case '\\':

404 case '/':	315 case '/':

405 seq_two_byte->SeqTwoByteStringSet(count++, c0_);	316 seq_two_byte->SeqTwoByteStringSet(count++, c0_);

(...skipping 12 matching lines...) Expand all Loading...
418 break;	329 break;

419 case 't':	330 case 't':

420 seq_two_byte->SeqTwoByteStringSet(count++, '\x09');	331 seq_two_byte->SeqTwoByteStringSet(count++, '\x09');

421 break;	332 break;

422 case 'u': {	333 case 'u': {

423 uc32 value = 0;	334 uc32 value = 0;

424 for (int i = 0; i < 4; i++) {	335 for (int i = 0; i < 4; i++) {

425 Advance();	336 Advance();

426 int digit = HexValue(c0_);	337 int digit = HexValue(c0_);

427 if (digit < 0) {	338 if (digit < 0) {

428 return Token::ILLEGAL;	339 return ReportUnexpectedToken();

429 }	340 }

430 value = value * 16 + digit;	341 value = value * 16 + digit;

431 }	342 }

432 seq_two_byte->SeqTwoByteStringSet(count++, value);	343 seq_two_byte->SeqTwoByteStringSet(count++, value);

433 break;	344 break;

434 }	345 }

435 default:	346 default:

436 return Token::ILLEGAL;	347 return ReportUnexpectedToken();

437 }	348 }

438 Advance();	349 Advance();

439 }	350 }

440 }	351 }

441 // Advance past the last '"'.	352 // Advance past the last '"'.

442 ASSERT_EQ('"', c0_);	353 ASSERT_EQ('"', c0_);

443 Advance();	354 AdvanceWS();

444	355

445 // Shrink the the string to our length.	356 // Shrink the the string to our length.

446 if (isolate()->heap()->InNewSpace(*seq_two_byte)) {	357 if (isolate()->heap()->InNewSpace(*seq_two_byte)) {

447 isolate()->heap()->new_space()->	358 isolate()->heap()->new_space()->

448 ShrinkStringAtAllocationBoundary<SeqTwoByteString>(*seq_two_byte,	359 ShrinkStringAtAllocationBoundary<SeqTwoByteString>(*seq_two_byte,

449 count);	360 count);

450 } else {	361 } else {

451 int string_size = SeqTwoByteString::SizeFor(count);	362 int string_size = SeqTwoByteString::SizeFor(count);

452 int allocated_string_size =	363 int allocated_string_size =

453 SeqTwoByteString::SizeFor(kInitialSpecialStringSize * allocation_count);	364 SeqTwoByteString::SizeFor(kInitialSpecialStringSize * allocation_count);

454 int delta = allocated_string_size - string_size;	365 int delta = allocated_string_size - string_size;

455 Address start_filler_object = seq_two_byte->address() + string_size;	366 Address start_filler_object = seq_two_byte->address() + string_size;

456 seq_two_byte->set_length(count);	367 seq_two_byte->set_length(count);

457 isolate()->heap()->CreateFillerObjectAt(start_filler_object, delta);	368 isolate()->heap()->CreateFillerObjectAt(start_filler_object, delta);

458 }	369 }

459 string_val_ = isolate()->factory()->NewConsString(ascii, seq_two_byte);	370 return isolate()->factory()->NewConsString(ascii, seq_two_byte);

460 return Token::STRING;

461 }	371 }

462	372

463	373

464 Token::Value JsonParser::ScanJsonString() {	374 template <bool is_symbol>

	375 Handle<Object> JsonParser::ScanJsonString() {

465 ASSERT_EQ('"', c0_);	376 ASSERT_EQ('"', c0_);

466 // Set string_val to null. If string_val is not set we assume an

467 // ascii string begining at next_.beg_pos + 1 to next_.end_pos - 1.

468 string_val_ = Handle<String>::null();

469 Advance();	377 Advance();

	378 beg_pos_ = position_;

470 // Fast case for ascii only without escape characters.	379 // Fast case for ascii only without escape characters.

471 while (c0_ != '"') {	380 while (c0_ != '"') {

472 // Check for control character (0x00-0x1f) or unterminated string (<0).	381 // Check for control character (0x00-0x1f) or unterminated string (<0).

473 if (c0_ < 0x20) return Token::ILLEGAL;	382 if (c0_ < 0x20) return ReportUnexpectedToken();

474 if (c0_ != '\\' && c0_ < kMaxAsciiCharCode) {	383 if (c0_ != '\\' && c0_ < kMaxAsciiCharCode) {

475 Advance();	384 Advance();

476 } else {	385 } else {

477 return SlowScanJsonString();	386 return SlowScanJsonString();

478 }	387 }

479 }	388 }

480 ASSERT_EQ('"', c0_);	389 ASSERT_EQ('"', c0_);

	390 end_pos_ = position_;

481 // Advance past the last '"'.	391 // Advance past the last '"'.

482 Advance();	392 AdvanceWS();

483 return Token::STRING;	393 if (is_sequential_ascii_ && is_symbol) {

484 }

485

486 Handle<String> JsonParser::GetString() {

487 return GetString(false);

488 }

489

490 Handle<String> JsonParser::GetSymbol() {

491 Handle<String> result = GetString(true);

492 if (result->IsSymbol()) return result;

493 return isolate()->factory()->LookupSymbol(result);

494 }

495

496 Handle<String> JsonParser::GetString(bool hint_symbol) {

497 // We have a non ascii string, return that.

498 if (!string_val_.is_null()) return string_val_;

499

500 if (is_sequential_ascii_ && hint_symbol) {

501 Handle<SeqAsciiString> seq = Handle<SeqAsciiString>::cast(source_);

502 // The current token includes the '"' in both ends.

503 int length = current_.end_pos - current_.beg_pos - 2;

504 return isolate()->factory()->LookupAsciiSymbol(seq_source_,	394 return isolate()->factory()->LookupAsciiSymbol(seq_source_,

505 current_.beg_pos + 1,	395 beg_pos_,

506 length);	396 end_pos_ - beg_pos_);

	397 } else {

	398 return isolate()->factory()->NewSubString(source_, beg_pos_, end_pos_);

507 }	399 }

508 // The current token includes the '"' in both ends.

509 return isolate()->factory()->NewSubString(

510 source_, current_.beg_pos + 1, current_.end_pos - 1);

511 }	400 }

512	401

513 } } // namespace v8::internal	402 } } // namespace v8::internal

OLD	NEW

« src/json-parser.h ('K') | « src/json-parser.h ('k') | no next file » | no next file with comments »