OLD | NEW |
(Empty) | |
| 1 /* |
| 2 * Copyright (C) 2010 Google Inc. All rights reserved. |
| 3 * |
| 4 * Redistribution and use in source and binary forms, with or without |
| 5 * modification, are permitted provided that the following conditions are |
| 6 * met: |
| 7 * |
| 8 * * Redistributions of source code must retain the above copyright |
| 9 * notice, this list of conditions and the following disclaimer. |
| 10 * * Redistributions in binary form must reproduce the above |
| 11 * copyright notice, this list of conditions and the following disclaimer |
| 12 * in the documentation and/or other materials provided with the |
| 13 * distribution. |
| 14 * * Neither the name of Google Inc. nor the names of its |
| 15 * contributors may be used to endorse or promote products derived from |
| 16 * this software without specific prior written permission. |
| 17 * |
| 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 29 */ |
| 30 |
| 31 #include "config.h" |
| 32 #include "core/inspector/JSONParser.h" |
| 33 |
| 34 #include "platform/JSONValues.h" |
| 35 #include "wtf/text/StringBuilder.h" |
| 36 |
| 37 namespace blink { |
| 38 |
| 39 namespace { |
| 40 |
| 41 const int stackLimit = 1000; |
| 42 |
| 43 enum Token { |
| 44 ObjectBegin, |
| 45 ObjectEnd, |
| 46 ArrayBegin, |
| 47 ArrayEnd, |
| 48 StringLiteral, |
| 49 Number, |
| 50 BoolTrue, |
| 51 BoolFalse, |
| 52 NullToken, |
| 53 ListSeparator, |
| 54 ObjectPairSeparator, |
| 55 InvalidToken, |
| 56 }; |
| 57 |
| 58 const char* const nullString = "null"; |
| 59 const char* const trueString = "true"; |
| 60 const char* const falseString = "false"; |
| 61 |
| 62 template<typename CharType> |
| 63 bool parseConstToken(const CharType* start, const CharType* end, const CharType*
* tokenEnd, const char* token) |
| 64 { |
| 65 while (start < end && *token != '\0' && *start++ == *token++) { } |
| 66 if (*token != '\0') |
| 67 return false; |
| 68 *tokenEnd = start; |
| 69 return true; |
| 70 } |
| 71 |
| 72 template<typename CharType> |
| 73 bool readInt(const CharType* start, const CharType* end, const CharType** tokenE
nd, bool canHaveLeadingZeros) |
| 74 { |
| 75 if (start == end) |
| 76 return false; |
| 77 bool haveLeadingZero = '0' == *start; |
| 78 int length = 0; |
| 79 while (start < end && '0' <= *start && *start <= '9') { |
| 80 ++start; |
| 81 ++length; |
| 82 } |
| 83 if (!length) |
| 84 return false; |
| 85 if (!canHaveLeadingZeros && length > 1 && haveLeadingZero) |
| 86 return false; |
| 87 *tokenEnd = start; |
| 88 return true; |
| 89 } |
| 90 |
| 91 template<typename CharType> |
| 92 bool parseNumberToken(const CharType* start, const CharType* end, const CharType
** tokenEnd) |
| 93 { |
| 94 // We just grab the number here. We validate the size in DecodeNumber. |
| 95 // According to RFC4627, a valid number is: [minus] int [frac] [exp] |
| 96 if (start == end) |
| 97 return false; |
| 98 CharType c = *start; |
| 99 if ('-' == c) |
| 100 ++start; |
| 101 |
| 102 if (!readInt(start, end, &start, false)) |
| 103 return false; |
| 104 if (start == end) { |
| 105 *tokenEnd = start; |
| 106 return true; |
| 107 } |
| 108 |
| 109 // Optional fraction part |
| 110 c = *start; |
| 111 if ('.' == c) { |
| 112 ++start; |
| 113 if (!readInt(start, end, &start, true)) |
| 114 return false; |
| 115 if (start == end) { |
| 116 *tokenEnd = start; |
| 117 return true; |
| 118 } |
| 119 c = *start; |
| 120 } |
| 121 |
| 122 // Optional exponent part |
| 123 if ('e' == c || 'E' == c) { |
| 124 ++start; |
| 125 if (start == end) |
| 126 return false; |
| 127 c = *start; |
| 128 if ('-' == c || '+' == c) { |
| 129 ++start; |
| 130 if (start == end) |
| 131 return false; |
| 132 } |
| 133 if (!readInt(start, end, &start, true)) |
| 134 return false; |
| 135 } |
| 136 |
| 137 *tokenEnd = start; |
| 138 return true; |
| 139 } |
| 140 |
| 141 template<typename CharType> |
| 142 bool readHexDigits(const CharType* start, const CharType* end, const CharType**
tokenEnd, int digits) |
| 143 { |
| 144 if (end - start < digits) |
| 145 return false; |
| 146 for (int i = 0; i < digits; ++i) { |
| 147 CharType c = *start++; |
| 148 if (!(('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c
<= 'F'))) |
| 149 return false; |
| 150 } |
| 151 *tokenEnd = start; |
| 152 return true; |
| 153 } |
| 154 |
| 155 template<typename CharType> |
| 156 bool parseStringToken(const CharType* start, const CharType* end, const CharType
** tokenEnd) |
| 157 { |
| 158 while (start < end) { |
| 159 CharType c = *start++; |
| 160 if ('\\' == c) { |
| 161 c = *start++; |
| 162 // Make sure the escaped char is valid. |
| 163 switch (c) { |
| 164 case 'x': |
| 165 if (!readHexDigits(start, end, &start, 2)) |
| 166 return false; |
| 167 break; |
| 168 case 'u': |
| 169 if (!readHexDigits(start, end, &start, 4)) |
| 170 return false; |
| 171 break; |
| 172 case '\\': |
| 173 case '/': |
| 174 case 'b': |
| 175 case 'f': |
| 176 case 'n': |
| 177 case 'r': |
| 178 case 't': |
| 179 case 'v': |
| 180 case '"': |
| 181 break; |
| 182 default: |
| 183 return false; |
| 184 } |
| 185 } else if ('"' == c) { |
| 186 *tokenEnd = start; |
| 187 return true; |
| 188 } |
| 189 } |
| 190 return false; |
| 191 } |
| 192 |
| 193 template<typename CharType> |
| 194 Token parseToken(const CharType* start, const CharType* end, const CharType** to
kenStart, const CharType** tokenEnd) |
| 195 { |
| 196 while (start < end && isSpaceOrNewline(*start)) |
| 197 ++start; |
| 198 |
| 199 if (start == end) |
| 200 return InvalidToken; |
| 201 |
| 202 *tokenStart = start; |
| 203 |
| 204 switch (*start) { |
| 205 case 'n': |
| 206 if (parseConstToken(start, end, tokenEnd, nullString)) |
| 207 return NullToken; |
| 208 break; |
| 209 case 't': |
| 210 if (parseConstToken(start, end, tokenEnd, trueString)) |
| 211 return BoolTrue; |
| 212 break; |
| 213 case 'f': |
| 214 if (parseConstToken(start, end, tokenEnd, falseString)) |
| 215 return BoolFalse; |
| 216 break; |
| 217 case '[': |
| 218 *tokenEnd = start + 1; |
| 219 return ArrayBegin; |
| 220 case ']': |
| 221 *tokenEnd = start + 1; |
| 222 return ArrayEnd; |
| 223 case ',': |
| 224 *tokenEnd = start + 1; |
| 225 return ListSeparator; |
| 226 case '{': |
| 227 *tokenEnd = start + 1; |
| 228 return ObjectBegin; |
| 229 case '}': |
| 230 *tokenEnd = start + 1; |
| 231 return ObjectEnd; |
| 232 case ':': |
| 233 *tokenEnd = start + 1; |
| 234 return ObjectPairSeparator; |
| 235 case '0': |
| 236 case '1': |
| 237 case '2': |
| 238 case '3': |
| 239 case '4': |
| 240 case '5': |
| 241 case '6': |
| 242 case '7': |
| 243 case '8': |
| 244 case '9': |
| 245 case '-': |
| 246 if (parseNumberToken(start, end, tokenEnd)) |
| 247 return Number; |
| 248 break; |
| 249 case '"': |
| 250 if (parseStringToken(start + 1, end, tokenEnd)) |
| 251 return StringLiteral; |
| 252 break; |
| 253 } |
| 254 return InvalidToken; |
| 255 } |
| 256 |
| 257 template<typename CharType> |
| 258 inline int hexToInt(CharType c) |
| 259 { |
| 260 if ('0' <= c && c <= '9') |
| 261 return c - '0'; |
| 262 if ('A' <= c && c <= 'F') |
| 263 return c - 'A' + 10; |
| 264 if ('a' <= c && c <= 'f') |
| 265 return c - 'a' + 10; |
| 266 ASSERT_NOT_REACHED(); |
| 267 return 0; |
| 268 } |
| 269 |
| 270 template<typename CharType> |
| 271 bool decodeString(const CharType* start, const CharType* end, StringBuilder* out
put) |
| 272 { |
| 273 while (start < end) { |
| 274 UChar c = *start++; |
| 275 if ('\\' != c) { |
| 276 output->append(c); |
| 277 continue; |
| 278 } |
| 279 c = *start++; |
| 280 switch (c) { |
| 281 case '"': |
| 282 case '/': |
| 283 case '\\': |
| 284 break; |
| 285 case 'b': |
| 286 c = '\b'; |
| 287 break; |
| 288 case 'f': |
| 289 c = '\f'; |
| 290 break; |
| 291 case 'n': |
| 292 c = '\n'; |
| 293 break; |
| 294 case 'r': |
| 295 c = '\r'; |
| 296 break; |
| 297 case 't': |
| 298 c = '\t'; |
| 299 break; |
| 300 case 'v': |
| 301 c = '\v'; |
| 302 break; |
| 303 case 'x': |
| 304 c = (hexToInt(*start) << 4) + |
| 305 hexToInt(*(start + 1)); |
| 306 start += 2; |
| 307 break; |
| 308 case 'u': |
| 309 c = (hexToInt(*start) << 12) + |
| 310 (hexToInt(*(start + 1)) << 8) + |
| 311 (hexToInt(*(start + 2)) << 4) + |
| 312 hexToInt(*(start + 3)); |
| 313 start += 4; |
| 314 break; |
| 315 default: |
| 316 return false; |
| 317 } |
| 318 output->append(c); |
| 319 } |
| 320 return true; |
| 321 } |
| 322 |
| 323 template<typename CharType> |
| 324 bool decodeString(const CharType* start, const CharType* end, String* output) |
| 325 { |
| 326 if (start == end) { |
| 327 *output = ""; |
| 328 return true; |
| 329 } |
| 330 if (start > end) |
| 331 return false; |
| 332 StringBuilder buffer; |
| 333 buffer.reserveCapacity(end - start); |
| 334 if (!decodeString(start, end, &buffer)) |
| 335 return false; |
| 336 *output = buffer.toString(); |
| 337 return true; |
| 338 } |
| 339 |
| 340 template<typename CharType> |
| 341 PassRefPtr<JSONValue> buildValue(const CharType* start, const CharType* end, con
st CharType** valueTokenEnd, int depth) |
| 342 { |
| 343 if (depth > stackLimit) |
| 344 return nullptr; |
| 345 |
| 346 RefPtr<JSONValue> result; |
| 347 const CharType* tokenStart; |
| 348 const CharType* tokenEnd; |
| 349 Token token = parseToken(start, end, &tokenStart, &tokenEnd); |
| 350 switch (token) { |
| 351 case InvalidToken: |
| 352 return nullptr; |
| 353 case NullToken: |
| 354 result = JSONValue::null(); |
| 355 break; |
| 356 case BoolTrue: |
| 357 result = JSONBasicValue::create(true); |
| 358 break; |
| 359 case BoolFalse: |
| 360 result = JSONBasicValue::create(false); |
| 361 break; |
| 362 case Number: { |
| 363 bool ok; |
| 364 double value = charactersToDouble(tokenStart, tokenEnd - tokenStart, &ok
); |
| 365 if (!ok) |
| 366 return nullptr; |
| 367 result = JSONBasicValue::create(value); |
| 368 break; |
| 369 } |
| 370 case StringLiteral: { |
| 371 String value; |
| 372 bool ok = decodeString(tokenStart + 1, tokenEnd - 1, &value); |
| 373 if (!ok) |
| 374 return nullptr; |
| 375 result = JSONString::create(value); |
| 376 break; |
| 377 } |
| 378 case ArrayBegin: { |
| 379 RefPtr<JSONArray> array = JSONArray::create(); |
| 380 start = tokenEnd; |
| 381 token = parseToken(start, end, &tokenStart, &tokenEnd); |
| 382 while (token != ArrayEnd) { |
| 383 RefPtr<JSONValue> arrayNode = buildValue(start, end, &tokenEnd, dept
h + 1); |
| 384 if (!arrayNode) |
| 385 return nullptr; |
| 386 array->pushValue(arrayNode); |
| 387 |
| 388 // After a list value, we expect a comma or the end of the list. |
| 389 start = tokenEnd; |
| 390 token = parseToken(start, end, &tokenStart, &tokenEnd); |
| 391 if (token == ListSeparator) { |
| 392 start = tokenEnd; |
| 393 token = parseToken(start, end, &tokenStart, &tokenEnd); |
| 394 if (token == ArrayEnd) |
| 395 return nullptr; |
| 396 } else if (token != ArrayEnd) { |
| 397 // Unexpected value after list value. Bail out. |
| 398 return nullptr; |
| 399 } |
| 400 } |
| 401 if (token != ArrayEnd) |
| 402 return nullptr; |
| 403 result = array.release(); |
| 404 break; |
| 405 } |
| 406 case ObjectBegin: { |
| 407 RefPtr<JSONObject> object = JSONObject::create(); |
| 408 start = tokenEnd; |
| 409 token = parseToken(start, end, &tokenStart, &tokenEnd); |
| 410 while (token != ObjectEnd) { |
| 411 if (token != StringLiteral) |
| 412 return nullptr; |
| 413 String key; |
| 414 if (!decodeString(tokenStart + 1, tokenEnd - 1, &key)) |
| 415 return nullptr; |
| 416 start = tokenEnd; |
| 417 |
| 418 token = parseToken(start, end, &tokenStart, &tokenEnd); |
| 419 if (token != ObjectPairSeparator) |
| 420 return nullptr; |
| 421 start = tokenEnd; |
| 422 |
| 423 RefPtr<JSONValue> value = buildValue(start, end, &tokenEnd, depth +
1); |
| 424 if (!value) |
| 425 return nullptr; |
| 426 object->setValue(key, value); |
| 427 start = tokenEnd; |
| 428 |
| 429 // After a key/value pair, we expect a comma or the end of the |
| 430 // object. |
| 431 token = parseToken(start, end, &tokenStart, &tokenEnd); |
| 432 if (token == ListSeparator) { |
| 433 start = tokenEnd; |
| 434 token = parseToken(start, end, &tokenStart, &tokenEnd); |
| 435 if (token == ObjectEnd) |
| 436 return nullptr; |
| 437 } else if (token != ObjectEnd) { |
| 438 // Unexpected value after last object value. Bail out. |
| 439 return nullptr; |
| 440 } |
| 441 } |
| 442 if (token != ObjectEnd) |
| 443 return nullptr; |
| 444 result = object.release(); |
| 445 break; |
| 446 } |
| 447 |
| 448 default: |
| 449 // We got a token that's not a value. |
| 450 return nullptr; |
| 451 } |
| 452 *valueTokenEnd = tokenEnd; |
| 453 return result.release(); |
| 454 } |
| 455 |
| 456 template<typename CharType> |
| 457 PassRefPtr<JSONValue> parseJSONInternal(const CharType* start, unsigned length) |
| 458 { |
| 459 const CharType* end = start + length; |
| 460 const CharType *tokenEnd; |
| 461 RefPtr<JSONValue> value = buildValue(start, end, &tokenEnd, 0); |
| 462 if (!value || tokenEnd != end) |
| 463 return nullptr; |
| 464 return value.release(); |
| 465 } |
| 466 |
| 467 } // anonymous namespace |
| 468 |
| 469 PassRefPtr<JSONValue> parseJSON(const String& json) |
| 470 { |
| 471 if (json.isEmpty()) |
| 472 return nullptr; |
| 473 if (json.is8Bit()) |
| 474 return parseJSONInternal(json.characters8(), json.length()); |
| 475 return parseJSONInternal(json.characters16(), json.length()); |
| 476 } |
| 477 |
| 478 } // namespace blink |
OLD | NEW |