| OLD | NEW |
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 10 matching lines...) Expand all Loading... |
| 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 27 | 27 |
| 28 #ifndef V8_JSON_PARSER_H_ | 28 #ifndef V8_JSON_PARSER_H_ |
| 29 #define V8_JSON_PARSER_H_ | 29 #define V8_JSON_PARSER_H_ |
| 30 | 30 |
| 31 #include "v8.h" |
| 32 |
| 33 #include "char-predicates-inl.h" |
| 34 #include "v8conversions.h" |
| 35 #include "messages.h" |
| 36 #include "spaces-inl.h" |
| 31 #include "token.h" | 37 #include "token.h" |
| 32 | 38 |
| 33 namespace v8 { | 39 namespace v8 { |
| 34 namespace internal { | 40 namespace internal { |
| 35 | 41 |
| 36 // A simple json parser. | 42 // A simple json parser. |
| 43 template <bool seq_ascii> |
| 37 class JsonParser BASE_EMBEDDED { | 44 class JsonParser BASE_EMBEDDED { |
| 38 public: | 45 public: |
| 39 static Handle<Object> Parse(Handle<String> source) { | 46 static Handle<Object> Parse(Handle<String> source) { |
| 40 return JsonParser().ParseJson(source); | 47 return JsonParser().ParseJson(source); |
| 41 } | 48 } |
| 42 | 49 |
| 43 static const int kEndOfString = -1; | 50 static const int kEndOfString = -1; |
| 44 | 51 |
| 45 private: | 52 private: |
| 46 // Parse a string containing a single JSON value. | 53 // Parse a string containing a single JSON value. |
| 47 Handle<Object> ParseJson(Handle<String> source); | 54 Handle<Object> ParseJson(Handle<String> source); |
| 48 | 55 |
| 49 inline void Advance() { | 56 inline void Advance() { |
| 57 position_++; |
| 50 if (position_ >= source_length_) { | 58 if (position_ >= source_length_) { |
| 51 position_++; | |
| 52 c0_ = kEndOfString; | 59 c0_ = kEndOfString; |
| 53 } else if (is_sequential_ascii_) { | 60 } else if (seq_ascii) { |
| 54 position_++; | |
| 55 c0_ = seq_source_->SeqAsciiStringGet(position_); | 61 c0_ = seq_source_->SeqAsciiStringGet(position_); |
| 56 } else { | 62 } else { |
| 57 position_++; | |
| 58 c0_ = source_->Get(position_); | 63 c0_ = source_->Get(position_); |
| 59 } | 64 } |
| 60 } | 65 } |
| 61 | 66 |
| 62 inline Isolate* isolate() { return isolate_; } | 67 // The JSON lexical grammar is specified in the ECMAScript 5 standard, |
| 68 // section 15.12.1.1. The only allowed whitespace characters between tokens |
| 69 // are tab, carriage-return, newline and space. |
| 63 | 70 |
| 64 // Get the string for the current string token. | 71 inline void AdvanceSkipWhitespace() { |
| 65 Handle<String> GetString(bool hint_symbol); | 72 do { |
| 66 Handle<String> GetString(); | 73 Advance(); |
| 67 Handle<String> GetSymbol(); | 74 } while (c0_ == '\t' || c0_ == '\r' || c0_ == '\n' || c0_ == ' '); |
| 75 } |
| 68 | 76 |
| 69 // Scan a single JSON token. The JSON lexical grammar is specified in the | 77 inline void SkipWhitespace() { |
| 70 // ECMAScript 5 standard, section 15.12.1.1. | 78 while (c0_ == '\t' || c0_ == '\r' || c0_ == '\n' || c0_ == ' ') { |
| 71 // Recognizes all of the single-character tokens directly, or calls a function | 79 Advance(); |
| 72 // to scan a number, string or identifier literal. | 80 } |
| 73 // The only allowed whitespace characters between tokens are tab, | 81 } |
| 74 // carriage-return, newline and space. | 82 |
| 75 void ScanJson(); | 83 inline uc32 AdvanceGetChar() { |
| 84 Advance(); |
| 85 return c0_; |
| 86 } |
| 87 |
| 88 // Checks that current charater is c. |
| 89 // If so, then consume c and skip whitespace. |
| 90 inline bool MatchSkipWhiteSpace(uc32 c) { |
| 91 if (c0_ == c) { |
| 92 AdvanceSkipWhitespace(); |
| 93 return true; |
| 94 } |
| 95 return false; |
| 96 } |
| 76 | 97 |
| 77 // A JSON string (production JSONString) is subset of valid JavaScript string | 98 // A JSON string (production JSONString) is subset of valid JavaScript string |
| 78 // literals. The string must only be double-quoted (not single-quoted), and | 99 // literals. The string must only be double-quoted (not single-quoted), and |
| 79 // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and | 100 // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and |
| 80 // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid. | 101 // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid. |
| 81 Token::Value ScanJsonString(); | 102 Handle<String> ParseJsonString() { |
| 82 // Slow version for unicode support, uses the first ascii_count characters, | 103 return ScanJsonString<false>(); |
| 83 // as first part of a ConsString | 104 } |
| 84 Token::Value SlowScanJsonString(); | 105 Handle<String> ParseJsonSymbol() { |
| 106 return ScanJsonString<true>(); |
| 107 } |
| 108 template <bool is_symbol> |
| 109 Handle<String> ScanJsonString(); |
| 110 // Creates a new string and copies prefix[start..end] into the beginning |
| 111 // of it. Then scans the rest of the string, adding characters after the |
| 112 // prefix. Called by ScanJsonString when reaching a '\' or non-ASCII char. |
| 113 template <typename StringType, typename SinkChar> |
| 114 Handle<String> SlowScanJsonString(Handle<String> prefix, int start, int end); |
| 85 | 115 |
| 86 // A JSON number (production JSONNumber) is a subset of the valid JavaScript | 116 // A JSON number (production JSONNumber) is a subset of the valid JavaScript |
| 87 // decimal number literals. | 117 // decimal number literals. |
| 88 // It includes an optional minus sign, must have at least one | 118 // It includes an optional minus sign, must have at least one |
| 89 // digit before and after a decimal point, may not have prefixed zeros (unless | 119 // digit before and after a decimal point, may not have prefixed zeros (unless |
| 90 // the integer part is zero), and may include an exponent part (e.g., "e-10"). | 120 // the integer part is zero), and may include an exponent part (e.g., "e-10"). |
| 91 // Hexadecimal and octal numbers are not allowed. | 121 // Hexadecimal and octal numbers are not allowed. |
| 92 Token::Value ScanJsonNumber(); | 122 Handle<Object> ParseJsonNumber(); |
| 93 | |
| 94 // Used to recognizes one of the literals "true", "false", or "null". These | |
| 95 // are the only valid JSON identifiers (productions JSONBooleanLiteral, | |
| 96 // JSONNullLiteral). | |
| 97 Token::Value ScanJsonIdentifier(const char* text, Token::Value token); | |
| 98 | 123 |
| 99 // Parse a single JSON value from input (grammar production JSONValue). | 124 // Parse a single JSON value from input (grammar production JSONValue). |
| 100 // A JSON value is either a (double-quoted) string literal, a number literal, | 125 // A JSON value is either a (double-quoted) string literal, a number literal, |
| 101 // one of "true", "false", or "null", or an object or array literal. | 126 // one of "true", "false", or "null", or an object or array literal. |
| 102 Handle<Object> ParseJsonValue(); | 127 Handle<Object> ParseJsonValue(); |
| 103 | 128 |
| 104 // Parse a JSON object literal (grammar production JSONObject). | 129 // Parse a JSON object literal (grammar production JSONObject). |
| 105 // An object literal is a squiggly-braced and comma separated sequence | 130 // An object literal is a squiggly-braced and comma separated sequence |
| 106 // (possibly empty) of key/value pairs, where the key is a JSON string | 131 // (possibly empty) of key/value pairs, where the key is a JSON string |
| 107 // literal, the value is a JSON value, and the two are separated by a colon. | 132 // literal, the value is a JSON value, and the two are separated by a colon. |
| 108 // A JSON array dosn't allow numbers and identifiers as keys, like a | 133 // A JSON array dosn't allow numbers and identifiers as keys, like a |
| 109 // JavaScript array. | 134 // JavaScript array. |
| 110 Handle<Object> ParseJsonObject(); | 135 Handle<Object> ParseJsonObject(); |
| 111 | 136 |
| 112 // Parses a JSON array literal (grammar production JSONArray). An array | 137 // Parses a JSON array literal (grammar production JSONArray). An array |
| 113 // literal is a square-bracketed and comma separated sequence (possibly empty) | 138 // literal is a square-bracketed and comma separated sequence (possibly empty) |
| 114 // of JSON values. | 139 // of JSON values. |
| 115 // A JSON array doesn't allow leaving out values from the sequence, nor does | 140 // A JSON array doesn't allow leaving out values from the sequence, nor does |
| 116 // it allow a terminal comma, like a JavaScript array does. | 141 // it allow a terminal comma, like a JavaScript array does. |
| 117 Handle<Object> ParseJsonArray(); | 142 Handle<Object> ParseJsonArray(); |
| 118 | 143 |
| 119 | 144 |
| 120 // Mark that a parsing error has happened at the current token, and | 145 // Mark that a parsing error has happened at the current token, and |
| 121 // return a null handle. Primarily for readability. | 146 // return a null handle. Primarily for readability. |
| 122 Handle<Object> ReportUnexpectedToken() { return Handle<Object>::null(); } | 147 inline Handle<Object> ReportUnexpectedCharacter() { |
| 123 | 148 return Handle<Object>::null(); |
| 124 // Peek at the next token. | 149 } |
| 125 Token::Value Peek() { return next_.token; } | 150 |
| 126 // Scan the next token and return the token scanned on the last call. | 151 inline Isolate* isolate() { return isolate_; } |
| 127 Token::Value Next(); | 152 |
| 128 | 153 static const int kInitialSpecialStringLength = 1024; |
| 129 struct TokenInfo { | |
| 130 TokenInfo() : token(Token::ILLEGAL), | |
| 131 beg_pos(0), | |
| 132 end_pos(0) { } | |
| 133 Token::Value token; | |
| 134 int beg_pos; | |
| 135 int end_pos; | |
| 136 }; | |
| 137 | |
| 138 static const int kInitialSpecialStringSize = 100; | |
| 139 | 154 |
| 140 | 155 |
| 141 private: | 156 private: |
| 142 Handle<String> source_; | 157 Handle<String> source_; |
| 143 int source_length_; | 158 int source_length_; |
| 144 Handle<SeqAsciiString> seq_source_; | 159 Handle<SeqAsciiString> seq_source_; |
| 145 | 160 |
| 146 bool is_sequential_ascii_; | |
| 147 // Current and next token | |
| 148 TokenInfo current_; | |
| 149 TokenInfo next_; | |
| 150 Isolate* isolate_; | 161 Isolate* isolate_; |
| 151 uc32 c0_; | 162 uc32 c0_; |
| 152 int position_; | 163 int position_; |
| 153 | |
| 154 | |
| 155 Handle<String> string_val_; | |
| 156 double number_; | |
| 157 }; | 164 }; |
| 158 | 165 |
| 166 template <bool seq_ascii> |
| 167 Handle<Object> JsonParser<seq_ascii>::ParseJson(Handle<String> source) { |
| 168 isolate_ = source->map()->GetHeap()->isolate(); |
| 169 source_ = Handle<String>(source->TryFlattenGetString()); |
| 170 source_length_ = source_->length(); |
| 171 |
| 172 // Optimized fast case where we only have ASCII characters. |
| 173 if (seq_ascii) { |
| 174 seq_source_ = Handle<SeqAsciiString>::cast(source_); |
| 175 } |
| 176 |
| 177 // Set initial position right before the string. |
| 178 position_ = -1; |
| 179 // Advance to the first character (posibly EOS) |
| 180 AdvanceSkipWhitespace(); |
| 181 Handle<Object> result = ParseJsonValue(); |
| 182 if (result.is_null() || c0_ != kEndOfString) { |
| 183 // Parse failed. Current character is the unexpected token. |
| 184 |
| 185 const char* message; |
| 186 Factory* factory = isolate()->factory(); |
| 187 Handle<JSArray> array; |
| 188 |
| 189 switch (c0_) { |
| 190 case kEndOfString: |
| 191 message = "unexpected_eos"; |
| 192 array = factory->NewJSArray(0); |
| 193 break; |
| 194 case '-': |
| 195 case '0': |
| 196 case '1': |
| 197 case '2': |
| 198 case '3': |
| 199 case '4': |
| 200 case '5': |
| 201 case '6': |
| 202 case '7': |
| 203 case '8': |
| 204 case '9': |
| 205 message = "unexpected_token_number"; |
| 206 array = factory->NewJSArray(0); |
| 207 break; |
| 208 case '"': |
| 209 message = "unexpected_token_string"; |
| 210 array = factory->NewJSArray(0); |
| 211 break; |
| 212 default: |
| 213 message = "unexpected_token"; |
| 214 Handle<Object> name = LookupSingleCharacterStringFromCode(c0_); |
| 215 Handle<FixedArray> element = factory->NewFixedArray(1); |
| 216 element->set(0, *name); |
| 217 array = factory->NewJSArrayWithElements(element); |
| 218 break; |
| 219 } |
| 220 |
| 221 MessageLocation location(factory->NewScript(source), |
| 222 position_, |
| 223 position_ + 1); |
| 224 Handle<Object> result = factory->NewSyntaxError(message, array); |
| 225 isolate()->Throw(*result, &location); |
| 226 return Handle<Object>::null(); |
| 227 } |
| 228 return result; |
| 229 } |
| 230 |
| 231 |
| 232 // Parse any JSON value. |
| 233 template <bool seq_ascii> |
| 234 Handle<Object> JsonParser<seq_ascii>::ParseJsonValue() { |
| 235 switch (c0_) { |
| 236 case '"': |
| 237 return ParseJsonString(); |
| 238 case '-': |
| 239 case '0': |
| 240 case '1': |
| 241 case '2': |
| 242 case '3': |
| 243 case '4': |
| 244 case '5': |
| 245 case '6': |
| 246 case '7': |
| 247 case '8': |
| 248 case '9': |
| 249 return ParseJsonNumber(); |
| 250 case 'f': |
| 251 if (AdvanceGetChar() == 'a' && AdvanceGetChar() == 'l' && |
| 252 AdvanceGetChar() == 's' && AdvanceGetChar() == 'e') { |
| 253 AdvanceSkipWhitespace(); |
| 254 return isolate()->factory()->false_value(); |
| 255 } else { |
| 256 return ReportUnexpectedCharacter(); |
| 257 } |
| 258 case 't': |
| 259 if (AdvanceGetChar() == 'r' && AdvanceGetChar() == 'u' && |
| 260 AdvanceGetChar() == 'e') { |
| 261 AdvanceSkipWhitespace(); |
| 262 return isolate()->factory()->true_value(); |
| 263 } else { |
| 264 return ReportUnexpectedCharacter(); |
| 265 } |
| 266 case 'n': |
| 267 if (AdvanceGetChar() == 'u' && AdvanceGetChar() == 'l' && |
| 268 AdvanceGetChar() == 'l') { |
| 269 AdvanceSkipWhitespace(); |
| 270 return isolate()->factory()->null_value(); |
| 271 } else { |
| 272 return ReportUnexpectedCharacter(); |
| 273 } |
| 274 case '{': |
| 275 return ParseJsonObject(); |
| 276 case '[': |
| 277 return ParseJsonArray(); |
| 278 default: |
| 279 return ReportUnexpectedCharacter(); |
| 280 } |
| 281 } |
| 282 |
| 283 |
| 284 // Parse a JSON object. Position must be right at '{'. |
| 285 template <bool seq_ascii> |
| 286 Handle<Object> JsonParser<seq_ascii>::ParseJsonObject() { |
| 287 Handle<JSFunction> object_constructor( |
| 288 isolate()->global_context()->object_function()); |
| 289 Handle<JSObject> json_object = |
| 290 isolate()->factory()->NewJSObject(object_constructor); |
| 291 ASSERT_EQ(c0_, '{'); |
| 292 |
| 293 AdvanceSkipWhitespace(); |
| 294 if (c0_ != '}') { |
| 295 do { |
| 296 if (c0_ != '"') return ReportUnexpectedCharacter(); |
| 297 Handle<String> key = ParseJsonSymbol(); |
| 298 if (key.is_null() || c0_ != ':') return ReportUnexpectedCharacter(); |
| 299 AdvanceSkipWhitespace(); |
| 300 Handle<Object> value = ParseJsonValue(); |
| 301 if (value.is_null()) return ReportUnexpectedCharacter(); |
| 302 |
| 303 uint32_t index; |
| 304 if (key->AsArrayIndex(&index)) { |
| 305 SetOwnElement(json_object, index, value, kNonStrictMode); |
| 306 } else if (key->Equals(isolate()->heap()->Proto_symbol())) { |
| 307 SetPrototype(json_object, value); |
| 308 } else { |
| 309 SetLocalPropertyIgnoreAttributes(json_object, key, value, NONE); |
| 310 } |
| 311 } while (MatchSkipWhiteSpace(',')); |
| 312 if (c0_ != '}') { |
| 313 return ReportUnexpectedCharacter(); |
| 314 } |
| 315 } |
| 316 AdvanceSkipWhitespace(); |
| 317 return json_object; |
| 318 } |
| 319 |
| 320 // Parse a JSON array. Position must be right at '['. |
| 321 template <bool seq_ascii> |
| 322 Handle<Object> JsonParser<seq_ascii>::ParseJsonArray() { |
| 323 ZoneScope zone_scope(isolate(), DELETE_ON_EXIT); |
| 324 ZoneList<Handle<Object> > elements(4); |
| 325 ASSERT_EQ(c0_, '['); |
| 326 |
| 327 AdvanceSkipWhitespace(); |
| 328 if (c0_ != ']') { |
| 329 do { |
| 330 Handle<Object> element = ParseJsonValue(); |
| 331 if (element.is_null()) return ReportUnexpectedCharacter(); |
| 332 elements.Add(element); |
| 333 } while (MatchSkipWhiteSpace(',')); |
| 334 if (c0_ != ']') { |
| 335 return ReportUnexpectedCharacter(); |
| 336 } |
| 337 } |
| 338 AdvanceSkipWhitespace(); |
| 339 // Allocate a fixed array with all the elements. |
| 340 Handle<FixedArray> fast_elements = |
| 341 isolate()->factory()->NewFixedArray(elements.length()); |
| 342 for (int i = 0, n = elements.length(); i < n; i++) { |
| 343 fast_elements->set(i, *elements[i]); |
| 344 } |
| 345 return isolate()->factory()->NewJSArrayWithElements(fast_elements); |
| 346 } |
| 347 |
| 348 |
| 349 template <bool seq_ascii> |
| 350 Handle<Object> JsonParser<seq_ascii>::ParseJsonNumber() { |
| 351 bool negative = false; |
| 352 int beg_pos = position_; |
| 353 if (c0_ == '-') { |
| 354 Advance(); |
| 355 negative = true; |
| 356 } |
| 357 if (c0_ == '0') { |
| 358 Advance(); |
| 359 // Prefix zero is only allowed if it's the only digit before |
| 360 // a decimal point or exponent. |
| 361 if ('0' <= c0_ && c0_ <= '9') return ReportUnexpectedCharacter(); |
| 362 } else { |
| 363 int i = 0; |
| 364 int digits = 0; |
| 365 if (c0_ < '1' || c0_ > '9') return ReportUnexpectedCharacter(); |
| 366 do { |
| 367 i = i * 10 + c0_ - '0'; |
| 368 digits++; |
| 369 Advance(); |
| 370 } while (c0_ >= '0' && c0_ <= '9'); |
| 371 if (c0_ != '.' && c0_ != 'e' && c0_ != 'E' && digits < 10) { |
| 372 SkipWhitespace(); |
| 373 return Handle<Smi>(Smi::FromInt((negative ? -i : i)), isolate()); |
| 374 } |
| 375 } |
| 376 if (c0_ == '.') { |
| 377 Advance(); |
| 378 if (c0_ < '0' || c0_ > '9') return ReportUnexpectedCharacter(); |
| 379 do { |
| 380 Advance(); |
| 381 } while (c0_ >= '0' && c0_ <= '9'); |
| 382 } |
| 383 if (AsciiAlphaToLower(c0_) == 'e') { |
| 384 Advance(); |
| 385 if (c0_ == '-' || c0_ == '+') Advance(); |
| 386 if (c0_ < '0' || c0_ > '9') return ReportUnexpectedCharacter(); |
| 387 do { |
| 388 Advance(); |
| 389 } while (c0_ >= '0' && c0_ <= '9'); |
| 390 } |
| 391 int length = position_ - beg_pos; |
| 392 double number; |
| 393 if (seq_ascii) { |
| 394 Vector<const char> chars(seq_source_->GetChars() + beg_pos, length); |
| 395 number = StringToDouble(isolate()->unicode_cache(), |
| 396 chars, |
| 397 NO_FLAGS, // Hex, octal or trailing junk. |
| 398 OS::nan_value()); |
| 399 } else { |
| 400 Vector<char> buffer = Vector<char>::New(length); |
| 401 String::WriteToFlat(*source_, buffer.start(), beg_pos, position_); |
| 402 Vector<const char> result = |
| 403 Vector<const char>(reinterpret_cast<const char*>(buffer.start()), |
| 404 length); |
| 405 number = StringToDouble(isolate()->unicode_cache(), |
| 406 result, |
| 407 NO_FLAGS, // Hex, octal or trailing junk. |
| 408 0.0); |
| 409 buffer.Dispose(); |
| 410 } |
| 411 SkipWhitespace(); |
| 412 return isolate()->factory()->NewNumber(number); |
| 413 } |
| 414 |
| 415 |
| 416 template <typename StringType> |
| 417 inline void SeqStringSet(Handle<StringType> seq_str, int i, uc32 c); |
| 418 |
| 419 template <> |
| 420 inline void SeqStringSet(Handle<SeqTwoByteString> seq_str, int i, uc32 c) { |
| 421 seq_str->SeqTwoByteStringSet(i, c); |
| 422 } |
| 423 |
| 424 template <> |
| 425 inline void SeqStringSet(Handle<SeqAsciiString> seq_str, int i, uc32 c) { |
| 426 seq_str->SeqAsciiStringSet(i, c); |
| 427 } |
| 428 |
| 429 template <typename StringType> |
| 430 inline Handle<StringType> NewRawString(Factory* factory, int length); |
| 431 |
| 432 template <> |
| 433 inline Handle<SeqTwoByteString> NewRawString(Factory* factory, int length) { |
| 434 return factory->NewRawTwoByteString(length, NOT_TENURED); |
| 435 } |
| 436 |
| 437 template <> |
| 438 inline Handle<SeqAsciiString> NewRawString(Factory* factory, int length) { |
| 439 return factory->NewRawAsciiString(length, NOT_TENURED); |
| 440 } |
| 441 |
| 442 |
| 443 // Scans the rest of a JSON string starting from position_ and writes |
| 444 // prefix[start..end] along with the scanned characters into a |
| 445 // sequential string of type StringType. |
| 446 template <bool seq_ascii> |
| 447 template <typename StringType, typename SinkChar> |
| 448 Handle<String> JsonParser<seq_ascii>::SlowScanJsonString( |
| 449 Handle<String> prefix, int start, int end) { |
| 450 int count = end - start; |
| 451 int max_length = count + source_length_ - position_; |
| 452 int length = Min(max_length, Max(kInitialSpecialStringLength, 2 * count)); |
| 453 Handle<StringType> seq_str = NewRawString<StringType>(isolate()->factory(), |
| 454 length); |
| 455 // Copy prefix into seq_str. |
| 456 SinkChar* dest = seq_str->GetChars(); |
| 457 String::WriteToFlat(*prefix, dest, start, end); |
| 458 |
| 459 while (c0_ != '"') { |
| 460 if (count >= length) { |
| 461 // We need to create a longer sequential string for the result. |
| 462 return SlowScanJsonString<StringType, SinkChar>(seq_str, 0, count); |
| 463 } |
| 464 // Check for control character (0x00-0x1f) or unterminated string (<0). |
| 465 if (c0_ < 0x20) return Handle<String>::null(); |
| 466 if (c0_ != '\\') { |
| 467 // If the sink can contain UC16 characters, or source_ contains only |
| 468 // ASCII characters, there's no need to test whether we can store the |
| 469 // character. Otherwise check whether the UC16 source character can fit |
| 470 // in the ASCII sink. |
| 471 if (sizeof(SinkChar) == kUC16Size || |
| 472 seq_ascii || |
| 473 c0_ <= kMaxAsciiCharCode) { |
| 474 SeqStringSet(seq_str, count++, c0_); |
| 475 Advance(); |
| 476 } else { |
| 477 // StringType is SeqAsciiString and we just read a non-ASCII char. |
| 478 return SlowScanJsonString<SeqTwoByteString, uc16>(seq_str, 0, count); |
| 479 } |
| 480 } else { |
| 481 Advance(); // Advance past the \. |
| 482 switch (c0_) { |
| 483 case '"': |
| 484 case '\\': |
| 485 case '/': |
| 486 SeqStringSet(seq_str, count++, c0_); |
| 487 break; |
| 488 case 'b': |
| 489 SeqStringSet(seq_str, count++, '\x08'); |
| 490 break; |
| 491 case 'f': |
| 492 SeqStringSet(seq_str, count++, '\x0c'); |
| 493 break; |
| 494 case 'n': |
| 495 SeqStringSet(seq_str, count++, '\x0a'); |
| 496 break; |
| 497 case 'r': |
| 498 SeqStringSet(seq_str, count++, '\x0d'); |
| 499 break; |
| 500 case 't': |
| 501 SeqStringSet(seq_str, count++, '\x09'); |
| 502 break; |
| 503 case 'u': { |
| 504 uc32 value = 0; |
| 505 for (int i = 0; i < 4; i++) { |
| 506 Advance(); |
| 507 int digit = HexValue(c0_); |
| 508 if (digit < 0) { |
| 509 return Handle<String>::null(); |
| 510 } |
| 511 value = value * 16 + digit; |
| 512 } |
| 513 if (sizeof(SinkChar) == kUC16Size || value <= kMaxAsciiCharCode) { |
| 514 SeqStringSet(seq_str, count++, value); |
| 515 break; |
| 516 } else { |
| 517 // StringType is SeqAsciiString and we just read a non-ASCII char. |
| 518 position_ -= 6; // Rewind position_ to \ in \uxxxx. |
| 519 Advance(); |
| 520 return SlowScanJsonString<SeqTwoByteString, uc16>(seq_str, |
| 521 0, |
| 522 count); |
| 523 } |
| 524 } |
| 525 default: |
| 526 return Handle<String>::null(); |
| 527 } |
| 528 Advance(); |
| 529 } |
| 530 } |
| 531 // Shrink seq_string length to count. |
| 532 if (isolate()->heap()->InNewSpace(*seq_str)) { |
| 533 isolate()->heap()->new_space()-> |
| 534 template ShrinkStringAtAllocationBoundary<StringType>( |
| 535 *seq_str, count); |
| 536 } else { |
| 537 int string_size = StringType::SizeFor(count); |
| 538 int allocated_string_size = StringType::SizeFor(length); |
| 539 int delta = allocated_string_size - string_size; |
| 540 Address start_filler_object = seq_str->address() + string_size; |
| 541 seq_str->set_length(count); |
| 542 isolate()->heap()->CreateFillerObjectAt(start_filler_object, delta); |
| 543 } |
| 544 ASSERT_EQ('"', c0_); |
| 545 // Advance past the last '"'. |
| 546 AdvanceSkipWhitespace(); |
| 547 return seq_str; |
| 548 } |
| 549 |
| 550 |
| 551 template <bool seq_ascii> |
| 552 template <bool is_symbol> |
| 553 Handle<String> JsonParser<seq_ascii>::ScanJsonString() { |
| 554 ASSERT_EQ('"', c0_); |
| 555 Advance(); |
| 556 if (c0_ == '"') { |
| 557 AdvanceSkipWhitespace(); |
| 558 return Handle<String>(isolate()->heap()->empty_string()); |
| 559 } |
| 560 int beg_pos = position_; |
| 561 // Fast case for ASCII only without escape characters. |
| 562 do { |
| 563 // Check for control character (0x00-0x1f) or unterminated string (<0). |
| 564 if (c0_ < 0x20) return Handle<String>::null(); |
| 565 if (c0_ != '\\') { |
| 566 if (seq_ascii || c0_ <= kMaxAsciiCharCode) { |
| 567 Advance(); |
| 568 } else { |
| 569 return SlowScanJsonString<SeqTwoByteString, uc16>(source_, |
| 570 beg_pos, |
| 571 position_); |
| 572 } |
| 573 } else { |
| 574 return SlowScanJsonString<SeqAsciiString, char>(source_, |
| 575 beg_pos, |
| 576 position_); |
| 577 } |
| 578 } while (c0_ != '"'); |
| 579 int length = position_ - beg_pos; |
| 580 Handle<String> result; |
| 581 if (seq_ascii && is_symbol) { |
| 582 result = isolate()->factory()->LookupAsciiSymbol(seq_source_, |
| 583 beg_pos, |
| 584 length); |
| 585 } else { |
| 586 result = isolate()->factory()->NewRawAsciiString(length); |
| 587 char* dest = SeqAsciiString::cast(*result)->GetChars(); |
| 588 String::WriteToFlat(*source_, dest, beg_pos, position_); |
| 589 } |
| 590 ASSERT_EQ('"', c0_); |
| 591 // Advance past the last '"'. |
| 592 AdvanceSkipWhitespace(); |
| 593 return result; |
| 594 } |
| 595 |
| 159 } } // namespace v8::internal | 596 } } // namespace v8::internal |
| 160 | 597 |
| 161 #endif // V8_JSON_PARSER_H_ | 598 #endif // V8_JSON_PARSER_H_ |
| OLD | NEW |