| OLD | NEW |
| (Empty) |
| 1 // Copyright 2016 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "platform/JSONParser.h" | |
| 6 | |
| 7 #include "platform/Decimal.h" | |
| 8 #include "platform/JSONValues.h" | |
| 9 #include "wtf/text/StringBuilder.h" | |
| 10 #include "wtf/text/StringToNumber.h" | |
| 11 | |
| 12 namespace blink { | |
| 13 | |
| 14 namespace { | |
| 15 | |
| 16 const int stackLimit = 1000; | |
| 17 | |
| 18 enum Token { | |
| 19 ObjectBegin, | |
| 20 ObjectEnd, | |
| 21 ArrayBegin, | |
| 22 ArrayEnd, | |
| 23 StringLiteral, | |
| 24 Number, | |
| 25 BoolTrue, | |
| 26 BoolFalse, | |
| 27 NullToken, | |
| 28 ListSeparator, | |
| 29 ObjectPairSeparator, | |
| 30 InvalidToken, | |
| 31 }; | |
| 32 | |
| 33 const char* const nullString = "null"; | |
| 34 const char* const trueString = "true"; | |
| 35 const char* const falseString = "false"; | |
| 36 | |
| 37 template<typename CharType> | |
| 38 bool parseConstToken(const CharType* start, const CharType* end, const CharType*
* tokenEnd, const char* token) | |
| 39 { | |
| 40 while (start < end && *token != '\0' && *start++ == *token++) { } | |
| 41 if (*token != '\0') | |
| 42 return false; | |
| 43 *tokenEnd = start; | |
| 44 return true; | |
| 45 } | |
| 46 | |
| 47 template<typename CharType> | |
| 48 bool readInt(const CharType* start, const CharType* end, const CharType** tokenE
nd, bool canHaveLeadingZeros) | |
| 49 { | |
| 50 if (start == end) | |
| 51 return false; | |
| 52 bool haveLeadingZero = '0' == *start; | |
| 53 int length = 0; | |
| 54 while (start < end && '0' <= *start && *start <= '9') { | |
| 55 ++start; | |
| 56 ++length; | |
| 57 } | |
| 58 if (!length) | |
| 59 return false; | |
| 60 if (!canHaveLeadingZeros && length > 1 && haveLeadingZero) | |
| 61 return false; | |
| 62 *tokenEnd = start; | |
| 63 return true; | |
| 64 } | |
| 65 | |
| 66 template<typename CharType> | |
| 67 bool parseNumberToken(const CharType* start, const CharType* end, const CharType
** tokenEnd) | |
| 68 { | |
| 69 // We just grab the number here. We validate the size in DecodeNumber. | |
| 70 // According to RFC4627, a valid number is: [minus] int [frac] [exp] | |
| 71 if (start == end) | |
| 72 return false; | |
| 73 CharType c = *start; | |
| 74 if ('-' == c) | |
| 75 ++start; | |
| 76 | |
| 77 if (!readInt(start, end, &start, false)) | |
| 78 return false; | |
| 79 if (start == end) { | |
| 80 *tokenEnd = start; | |
| 81 return true; | |
| 82 } | |
| 83 | |
| 84 // Optional fraction part | |
| 85 c = *start; | |
| 86 if ('.' == c) { | |
| 87 ++start; | |
| 88 if (!readInt(start, end, &start, true)) | |
| 89 return false; | |
| 90 if (start == end) { | |
| 91 *tokenEnd = start; | |
| 92 return true; | |
| 93 } | |
| 94 c = *start; | |
| 95 } | |
| 96 | |
| 97 // Optional exponent part | |
| 98 if ('e' == c || 'E' == c) { | |
| 99 ++start; | |
| 100 if (start == end) | |
| 101 return false; | |
| 102 c = *start; | |
| 103 if ('-' == c || '+' == c) { | |
| 104 ++start; | |
| 105 if (start == end) | |
| 106 return false; | |
| 107 } | |
| 108 if (!readInt(start, end, &start, true)) | |
| 109 return false; | |
| 110 } | |
| 111 | |
| 112 *tokenEnd = start; | |
| 113 return true; | |
| 114 } | |
| 115 | |
| 116 template<typename CharType> | |
| 117 bool readHexDigits(const CharType* start, const CharType* end, const CharType**
tokenEnd, int digits) | |
| 118 { | |
| 119 if (end - start < digits) | |
| 120 return false; | |
| 121 for (int i = 0; i < digits; ++i) { | |
| 122 CharType c = *start++; | |
| 123 if (!(('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c
<= 'F'))) | |
| 124 return false; | |
| 125 } | |
| 126 *tokenEnd = start; | |
| 127 return true; | |
| 128 } | |
| 129 | |
| 130 template<typename CharType> | |
| 131 bool parseStringToken(const CharType* start, const CharType* end, const CharType
** tokenEnd) | |
| 132 { | |
| 133 while (start < end) { | |
| 134 CharType c = *start++; | |
| 135 if ('\\' == c) { | |
| 136 c = *start++; | |
| 137 // Make sure the escaped char is valid. | |
| 138 switch (c) { | |
| 139 case 'x': | |
| 140 if (!readHexDigits(start, end, &start, 2)) | |
| 141 return false; | |
| 142 break; | |
| 143 case 'u': | |
| 144 if (!readHexDigits(start, end, &start, 4)) | |
| 145 return false; | |
| 146 break; | |
| 147 case '\\': | |
| 148 case '/': | |
| 149 case 'b': | |
| 150 case 'f': | |
| 151 case 'n': | |
| 152 case 'r': | |
| 153 case 't': | |
| 154 case 'v': | |
| 155 case '"': | |
| 156 break; | |
| 157 default: | |
| 158 return false; | |
| 159 } | |
| 160 } else if ('"' == c) { | |
| 161 *tokenEnd = start; | |
| 162 return true; | |
| 163 } | |
| 164 } | |
| 165 return false; | |
| 166 } | |
| 167 | |
| 168 template<typename CharType> | |
| 169 bool skipComment(const CharType* start, const CharType* end, const CharType** co
mmentEnd) | |
| 170 { | |
| 171 if (start == end) | |
| 172 return false; | |
| 173 | |
| 174 if (*start != '/' || start + 1 >= end) | |
| 175 return false; | |
| 176 ++start; | |
| 177 | |
| 178 if (*start == '/') { | |
| 179 // Single line comment, read to newline. | |
| 180 for (++start; start < end; ++start) { | |
| 181 if (*start == '\n' || *start == '\r') { | |
| 182 *commentEnd = start + 1; | |
| 183 return true; | |
| 184 } | |
| 185 } | |
| 186 *commentEnd = end; | |
| 187 // Comment reaches end-of-input, which is fine. | |
| 188 return true; | |
| 189 } | |
| 190 | |
| 191 if (*start == '*') { | |
| 192 CharType previous = '\0'; | |
| 193 // Block comment, read until end marker. | |
| 194 for (++start; start < end; previous = *start++) { | |
| 195 if (previous == '*' && *start == '/') { | |
| 196 *commentEnd = start + 1; | |
| 197 return true; | |
| 198 } | |
| 199 } | |
| 200 // Block comment must close before end-of-input. | |
| 201 return false; | |
| 202 } | |
| 203 | |
| 204 return false; | |
| 205 } | |
| 206 | |
| 207 template<typename CharType> | |
| 208 void skipWhitespaceAndComments(const CharType* start, const CharType* end, const
CharType** whitespaceEnd) | |
| 209 { | |
| 210 while (start < end) { | |
| 211 if (isSpaceOrNewline(*start)) { | |
| 212 ++start; | |
| 213 } else if (*start == '/') { | |
| 214 const CharType* commentEnd; | |
| 215 if (!skipComment(start, end, &commentEnd)) | |
| 216 break; | |
| 217 start = commentEnd; | |
| 218 } else { | |
| 219 break; | |
| 220 } | |
| 221 } | |
| 222 *whitespaceEnd = start; | |
| 223 } | |
| 224 | |
| 225 template<typename CharType> | |
| 226 Token parseToken(const CharType* start, const CharType* end, const CharType** to
kenStart, const CharType** tokenEnd) | |
| 227 { | |
| 228 skipWhitespaceAndComments(start, end, tokenStart); | |
| 229 start = *tokenStart; | |
| 230 | |
| 231 if (start == end) | |
| 232 return InvalidToken; | |
| 233 | |
| 234 switch (*start) { | |
| 235 case 'n': | |
| 236 if (parseConstToken(start, end, tokenEnd, nullString)) | |
| 237 return NullToken; | |
| 238 break; | |
| 239 case 't': | |
| 240 if (parseConstToken(start, end, tokenEnd, trueString)) | |
| 241 return BoolTrue; | |
| 242 break; | |
| 243 case 'f': | |
| 244 if (parseConstToken(start, end, tokenEnd, falseString)) | |
| 245 return BoolFalse; | |
| 246 break; | |
| 247 case '[': | |
| 248 *tokenEnd = start + 1; | |
| 249 return ArrayBegin; | |
| 250 case ']': | |
| 251 *tokenEnd = start + 1; | |
| 252 return ArrayEnd; | |
| 253 case ',': | |
| 254 *tokenEnd = start + 1; | |
| 255 return ListSeparator; | |
| 256 case '{': | |
| 257 *tokenEnd = start + 1; | |
| 258 return ObjectBegin; | |
| 259 case '}': | |
| 260 *tokenEnd = start + 1; | |
| 261 return ObjectEnd; | |
| 262 case ':': | |
| 263 *tokenEnd = start + 1; | |
| 264 return ObjectPairSeparator; | |
| 265 case '0': | |
| 266 case '1': | |
| 267 case '2': | |
| 268 case '3': | |
| 269 case '4': | |
| 270 case '5': | |
| 271 case '6': | |
| 272 case '7': | |
| 273 case '8': | |
| 274 case '9': | |
| 275 case '-': | |
| 276 if (parseNumberToken(start, end, tokenEnd)) | |
| 277 return Number; | |
| 278 break; | |
| 279 case '"': | |
| 280 if (parseStringToken(start + 1, end, tokenEnd)) | |
| 281 return StringLiteral; | |
| 282 break; | |
| 283 } | |
| 284 return InvalidToken; | |
| 285 } | |
| 286 | |
| 287 template<typename CharType> | |
| 288 inline int hexToInt(CharType c) | |
| 289 { | |
| 290 if ('0' <= c && c <= '9') | |
| 291 return c - '0'; | |
| 292 if ('A' <= c && c <= 'F') | |
| 293 return c - 'A' + 10; | |
| 294 if ('a' <= c && c <= 'f') | |
| 295 return c - 'a' + 10; | |
| 296 NOTREACHED(); | |
| 297 return 0; | |
| 298 } | |
| 299 | |
| 300 template<typename CharType> | |
| 301 bool decodeString(const CharType* start, const CharType* end, StringBuilder* out
put) | |
| 302 { | |
| 303 while (start < end) { | |
| 304 UChar c = *start++; | |
| 305 if ('\\' != c) { | |
| 306 output->append(c); | |
| 307 continue; | |
| 308 } | |
| 309 c = *start++; | |
| 310 | |
| 311 if (c == 'x') { | |
| 312 // \x is not supported. | |
| 313 return false; | |
| 314 } | |
| 315 | |
| 316 switch (c) { | |
| 317 case '"': | |
| 318 case '/': | |
| 319 case '\\': | |
| 320 break; | |
| 321 case 'b': | |
| 322 c = '\b'; | |
| 323 break; | |
| 324 case 'f': | |
| 325 c = '\f'; | |
| 326 break; | |
| 327 case 'n': | |
| 328 c = '\n'; | |
| 329 break; | |
| 330 case 'r': | |
| 331 c = '\r'; | |
| 332 break; | |
| 333 case 't': | |
| 334 c = '\t'; | |
| 335 break; | |
| 336 case 'v': | |
| 337 c = '\v'; | |
| 338 break; | |
| 339 case 'u': | |
| 340 c = (hexToInt(*start) << 12) + | |
| 341 (hexToInt(*(start + 1)) << 8) + | |
| 342 (hexToInt(*(start + 2)) << 4) + | |
| 343 hexToInt(*(start + 3)); | |
| 344 start += 4; | |
| 345 break; | |
| 346 default: | |
| 347 return false; | |
| 348 } | |
| 349 output->append(c); | |
| 350 } | |
| 351 return true; | |
| 352 } | |
| 353 | |
| 354 template<typename CharType> | |
| 355 bool decodeString(const CharType* start, const CharType* end, String* output) | |
| 356 { | |
| 357 if (start == end) { | |
| 358 *output = ""; | |
| 359 return true; | |
| 360 } | |
| 361 if (start > end) | |
| 362 return false; | |
| 363 StringBuilder buffer; | |
| 364 buffer.reserveCapacity(end - start); | |
| 365 if (!decodeString(start, end, &buffer)) | |
| 366 return false; | |
| 367 *output = buffer.toString(); | |
| 368 // Validate constructed utf16 string. | |
| 369 if (output->utf8(StrictUTF8Conversion).isNull()) | |
| 370 return false; | |
| 371 return true; | |
| 372 } | |
| 373 | |
| 374 template<typename CharType> | |
| 375 std::unique_ptr<JSONValue> buildValue(const CharType* start, const CharType* end
, const CharType** valueTokenEnd, int depth) | |
| 376 { | |
| 377 if (depth > stackLimit) | |
| 378 return nullptr; | |
| 379 | |
| 380 std::unique_ptr<JSONValue> result; | |
| 381 const CharType* tokenStart; | |
| 382 const CharType* tokenEnd; | |
| 383 Token token = parseToken(start, end, &tokenStart, &tokenEnd); | |
| 384 switch (token) { | |
| 385 case InvalidToken: | |
| 386 return nullptr; | |
| 387 case NullToken: | |
| 388 result = JSONValue::null(); | |
| 389 break; | |
| 390 case BoolTrue: | |
| 391 result = JSONBasicValue::create(true); | |
| 392 break; | |
| 393 case BoolFalse: | |
| 394 result = JSONBasicValue::create(false); | |
| 395 break; | |
| 396 case Number: { | |
| 397 bool ok; | |
| 398 double value = charactersToDouble(tokenStart, tokenEnd - tokenStart, &ok
); | |
| 399 if (Decimal::fromDouble(value).isInfinity()) | |
| 400 ok = false; | |
| 401 if (!ok) | |
| 402 return nullptr; | |
| 403 int number = static_cast<int>(value); | |
| 404 if (number == value) | |
| 405 result = JSONBasicValue::create(number); | |
| 406 else | |
| 407 result = JSONBasicValue::create(value); | |
| 408 break; | |
| 409 } | |
| 410 case StringLiteral: { | |
| 411 String value; | |
| 412 bool ok = decodeString(tokenStart + 1, tokenEnd - 1, &value); | |
| 413 if (!ok) | |
| 414 return nullptr; | |
| 415 result = JSONString::create(value); | |
| 416 break; | |
| 417 } | |
| 418 case ArrayBegin: { | |
| 419 std::unique_ptr<JSONArray> array = JSONArray::create(); | |
| 420 start = tokenEnd; | |
| 421 token = parseToken(start, end, &tokenStart, &tokenEnd); | |
| 422 while (token != ArrayEnd) { | |
| 423 std::unique_ptr<JSONValue> arrayNode = buildValue(start, end, &token
End, depth + 1); | |
| 424 if (!arrayNode) | |
| 425 return nullptr; | |
| 426 array->pushValue(std::move(arrayNode)); | |
| 427 | |
| 428 // After a list value, we expect a comma or the end of the list. | |
| 429 start = tokenEnd; | |
| 430 token = parseToken(start, end, &tokenStart, &tokenEnd); | |
| 431 if (token == ListSeparator) { | |
| 432 start = tokenEnd; | |
| 433 token = parseToken(start, end, &tokenStart, &tokenEnd); | |
| 434 if (token == ArrayEnd) | |
| 435 return nullptr; | |
| 436 } else if (token != ArrayEnd) { | |
| 437 // Unexpected value after list value. Bail out. | |
| 438 return nullptr; | |
| 439 } | |
| 440 } | |
| 441 if (token != ArrayEnd) | |
| 442 return nullptr; | |
| 443 result = std::move(array); | |
| 444 break; | |
| 445 } | |
| 446 case ObjectBegin: { | |
| 447 std::unique_ptr<JSONObject> object = JSONObject::create(); | |
| 448 start = tokenEnd; | |
| 449 token = parseToken(start, end, &tokenStart, &tokenEnd); | |
| 450 while (token != ObjectEnd) { | |
| 451 if (token != StringLiteral) | |
| 452 return nullptr; | |
| 453 String key; | |
| 454 if (!decodeString(tokenStart + 1, tokenEnd - 1, &key)) | |
| 455 return nullptr; | |
| 456 start = tokenEnd; | |
| 457 | |
| 458 token = parseToken(start, end, &tokenStart, &tokenEnd); | |
| 459 if (token != ObjectPairSeparator) | |
| 460 return nullptr; | |
| 461 start = tokenEnd; | |
| 462 | |
| 463 std::unique_ptr<JSONValue> value = buildValue(start, end, &tokenEnd,
depth + 1); | |
| 464 if (!value) | |
| 465 return nullptr; | |
| 466 object->setValue(key, std::move(value)); | |
| 467 start = tokenEnd; | |
| 468 | |
| 469 // After a key/value pair, we expect a comma or the end of the | |
| 470 // object. | |
| 471 token = parseToken(start, end, &tokenStart, &tokenEnd); | |
| 472 if (token == ListSeparator) { | |
| 473 start = tokenEnd; | |
| 474 token = parseToken(start, end, &tokenStart, &tokenEnd); | |
| 475 if (token == ObjectEnd) | |
| 476 return nullptr; | |
| 477 } else if (token != ObjectEnd) { | |
| 478 // Unexpected value after last object value. Bail out. | |
| 479 return nullptr; | |
| 480 } | |
| 481 } | |
| 482 if (token != ObjectEnd) | |
| 483 return nullptr; | |
| 484 result = std::move(object); | |
| 485 break; | |
| 486 } | |
| 487 | |
| 488 default: | |
| 489 // We got a token that's not a value. | |
| 490 return nullptr; | |
| 491 } | |
| 492 | |
| 493 skipWhitespaceAndComments(tokenEnd, end, valueTokenEnd); | |
| 494 return result; | |
| 495 } | |
| 496 | |
| 497 template<typename CharType> | |
| 498 std::unique_ptr<JSONValue> parseJSONInternal(const CharType* start, unsigned len
gth) | |
| 499 { | |
| 500 const CharType* end = start + length; | |
| 501 const CharType *tokenEnd; | |
| 502 std::unique_ptr<JSONValue> value = buildValue(start, end, &tokenEnd, 0); | |
| 503 if (!value || tokenEnd != end) | |
| 504 return nullptr; | |
| 505 return value; | |
| 506 } | |
| 507 | |
| 508 } // anonymous namespace | |
| 509 | |
| 510 std::unique_ptr<JSONValue> parseJSON(const String& json) | |
| 511 { | |
| 512 if (json.isEmpty()) | |
| 513 return nullptr; | |
| 514 if (json.is8Bit()) | |
| 515 return parseJSONInternal(json.characters8(), json.length()); | |
| 516 return parseJSONInternal(json.characters16(), json.length()); | |
| 517 } | |
| 518 | |
| 519 } // namespace blink | |
| OLD | NEW |