base/json_reader.cc - Issue 316016: Move the json-related files into a separate json directory. This hopefully al...

Side by Side Diff: base/json_reader.cc

Issue 316016: Move the json-related files into a separate json directory. This hopefully al... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: '' Created 11 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
	(Empty)
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.

4

5 #include "base/json_reader.h"

6

7 #include "base/float_util.h"

8 #include "base/logging.h"

9 #include "base/scoped_ptr.h"

10 #include "base/string_util.h"

11 #include "base/utf_string_conversions.h"

12 #include "base/values.h"

13

14 static const JSONReader::Token kInvalidToken(JSONReader::Token::INVALID_TOKEN,

15 0, 0);

16 static const int kStackLimit = 100;

17

18 namespace {

19

20 inline int HexToInt(wchar_t c) {

21 if ('0' <= c && c <= '9') {

22 return c - '0';

23 } else if ('A' <= c && c <= 'F') {

24 return c - 'A' + 10;

25 } else if ('a' <= c && c <= 'f') {

26 return c - 'a' + 10;

27 }

28 NOTREACHED();

29 return 0;

30 }

31

32 // A helper method for ParseNumberToken. It reads an int from the end of

33 // token. The method returns false if there is no valid integer at the end of

34 // the token.

35 bool ReadInt(JSONReader::Token& token, bool can_have_leading_zeros) {

36 wchar_t first = token.NextChar();

37 int len = 0;

38

39 // Read in more digits

40 wchar_t c = first;

41 while ('\0' != c && '0' <= c && c <= '9') {

42 ++token.length;

43 ++len;

44 c = token.NextChar();

45 }

46 // We need at least 1 digit.

47 if (len == 0)

48 return false;

49

50 if (!can_have_leading_zeros && len > 1 && '0' == first)

51 return false;

52

53 return true;

54 }

55

56 // A helper method for ParseStringToken. It reads \|digits\| hex digits from the

57 // token. If the sequence if digits is not valid (contains other characters),

58 // the method returns false.

59 bool ReadHexDigits(JSONReader::Token& token, int digits) {

60 for (int i = 1; i <= digits; ++i) {

61 wchar_t c = *(token.begin + token.length + i);

62 if ('\0' == c)

63 return false;

64 if (!(('0' <= c && c <= '9') \|\| ('a' <= c && c <= 'f') \|\|

65 ('A' <= c && c <= 'F'))) {

66 return false;

67 }

68 }

69

70 token.length += digits;

71 return true;

72 }

73

74 } // anonymous namespace

75

76 const char* JSONReader::kBadRootElementType =

77 "Root value must be an array or object.";

78 const char* JSONReader::kInvalidEscape =

79 "Invalid escape sequence.";

80 const char* JSONReader::kSyntaxError =

81 "Syntax error.";

82 const char* JSONReader::kTrailingComma =

83 "Trailing comma not allowed.";

84 const char* JSONReader::kTooMuchNesting =

85 "Too much nesting.";

86 const char* JSONReader::kUnexpectedDataAfterRoot =

87 "Unexpected data after root element.";

88 const char* JSONReader::kUnsupportedEncoding =

89 "Unsupported encoding. JSON must be UTF-8.";

90 const char* JSONReader::kUnquotedDictionaryKey =

91 "Dictionary keys must be quoted.";

92

93 /* static */

94 Value* JSONReader::Read(const std::string& json,

95 bool allow_trailing_comma) {

96 return ReadAndReturnError(json, allow_trailing_comma, NULL);

97 }

98

99 /* static */

100 Value* JSONReader::ReadAndReturnError(const std::string& json,

101 bool allow_trailing_comma,

102 std::string *error_message_out) {

103 JSONReader reader = JSONReader();

104 Value* root = reader.JsonToValue(json, true, allow_trailing_comma);

105 if (root)

106 return root;

107

108 if (error_message_out)

109 *error_message_out = reader.error_message();

110

111 return NULL;

112 }

113

114 /* static */

115 std::string JSONReader::FormatErrorMessage(int line, int column,

116 const char* description) {

117 return StringPrintf("Line: %i, column: %i, %s",

118 line, column, description);

119 }

120

121 JSONReader::JSONReader()

122 : start_pos_(NULL), json_pos_(NULL), stack_depth_(0),

123 allow_trailing_comma_(false) {}

124

125 Value* JSONReader::JsonToValue(const std::string& json, bool check_root,

126 bool allow_trailing_comma) {

127 // The input must be in UTF-8.

128 if (!IsStringUTF8(json.c_str())) {

129 error_message_ = kUnsupportedEncoding;

130 return NULL;

131 }

132

133 // The conversion from UTF8 to wstring removes null bytes for us

134 // (a good thing).

135 std::wstring json_wide(UTF8ToWide(json));

136 start_pos_ = json_wide.c_str();

137

138 // When the input JSON string starts with a UTF-8 Byte-Order-Mark

139 // (0xEF, 0xBB, 0xBF), the UTF8ToWide() function converts it to a Unicode

140 // BOM (U+FEFF). To avoid the JSONReader::BuildValue() function from

141 // mis-treating a Unicode BOM as an invalid character and returning NULL,

142 // skip a converted Unicode BOM if it exists.

143 if (!json_wide.empty() && start_pos_[0] == 0xFEFF) {

144 ++start_pos_;

145 }

146

147 json_pos_ = start_pos_;

148 allow_trailing_comma_ = allow_trailing_comma;

149 stack_depth_ = 0;

150 error_message_.clear();

151

152 scoped_ptr<Value> root(BuildValue(check_root));

153 if (root.get()) {

154 if (ParseToken().type == Token::END_OF_INPUT) {

155 return root.release();

156 } else {

157 SetErrorMessage(kUnexpectedDataAfterRoot, json_pos_);

158 }

159 }

160

161 // Default to calling errors "syntax errors".

162 if (error_message_.empty())

163 SetErrorMessage(kSyntaxError, json_pos_);

164

165 return NULL;

166 }

167

168 Value* JSONReader::BuildValue(bool is_root) {

169 ++stack_depth_;

170 if (stack_depth_ > kStackLimit) {

171 SetErrorMessage(kTooMuchNesting, json_pos_);

172 return NULL;

173 }

174

175 Token token = ParseToken();

176 // The root token must be an array or an object.

177 if (is_root && token.type != Token::OBJECT_BEGIN &&

178 token.type != Token::ARRAY_BEGIN) {

179 SetErrorMessage(kBadRootElementType, json_pos_);

180 return NULL;

181 }

182

183 scoped_ptr<Value> node;

184

185 switch (token.type) {

186 case Token::END_OF_INPUT:

187 case Token::INVALID_TOKEN:

188 return NULL;

189

190 case Token::NULL_TOKEN:

191 node.reset(Value::CreateNullValue());

192 break;

193

194 case Token::BOOL_TRUE:

195 node.reset(Value::CreateBooleanValue(true));

196 break;

197

198 case Token::BOOL_FALSE:

199 node.reset(Value::CreateBooleanValue(false));

200 break;

201

202 case Token::NUMBER:

203 node.reset(DecodeNumber(token));

204 if (!node.get())

205 return NULL;

206 break;

207

208 case Token::STRING:

209 node.reset(DecodeString(token));

210 if (!node.get())

211 return NULL;

212 break;

213

214 case Token::ARRAY_BEGIN:

215 {

216 json_pos_ += token.length;

217 token = ParseToken();

218

219 node.reset(new ListValue());

220 while (token.type != Token::ARRAY_END) {

221 Value* array_node = BuildValue(false);

222 if (!array_node)

223 return NULL;

224 static_cast<ListValue*>(node.get())->Append(array_node);

225

226 // After a list value, we expect a comma or the end of the list.

227 token = ParseToken();

228 if (token.type == Token::LIST_SEPARATOR) {

229 json_pos_ += token.length;

230 token = ParseToken();

231 // Trailing commas are invalid according to the JSON RFC, but some

232 // consumers need the parsing leniency, so handle accordingly.

233 if (token.type == Token::ARRAY_END) {

234 if (!allow_trailing_comma_) {

235 SetErrorMessage(kTrailingComma, json_pos_);

236 return NULL;

237 }

238 // Trailing comma OK, stop parsing the Array.

239 break;

240 }

241 } else if (token.type != Token::ARRAY_END) {

242 // Unexpected value after list value. Bail out.

243 return NULL;

244 }

245 }

246 if (token.type != Token::ARRAY_END) {

247 return NULL;

248 }

249 break;

250 }

251

252 case Token::OBJECT_BEGIN:

253 {

254 json_pos_ += token.length;

255 token = ParseToken();

256

257 node.reset(new DictionaryValue);

258 while (token.type != Token::OBJECT_END) {

259 if (token.type != Token::STRING) {

260 SetErrorMessage(kUnquotedDictionaryKey, json_pos_);

261 return NULL;

262 }

263 scoped_ptr<Value> dict_key_value(DecodeString(token));

264 if (!dict_key_value.get())

265 return NULL;

266

267 // Convert the key into a wstring.

268 std::wstring dict_key;

269 bool success = dict_key_value->GetAsString(&dict_key);

270 DCHECK(success);

271

272 json_pos_ += token.length;

273 token = ParseToken();

274 if (token.type != Token::OBJECT_PAIR_SEPARATOR)

275 return NULL;

276

277 json_pos_ += token.length;

278 token = ParseToken();

279 Value* dict_value = BuildValue(false);

280 if (!dict_value)

281 return NULL;

282 static_cast<DictionaryValue*>(node.get())->Set(dict_key, dict_value);

283

284 // After a key/value pair, we expect a comma or the end of the

285 // object.

286 token = ParseToken();

287 if (token.type == Token::LIST_SEPARATOR) {

288 json_pos_ += token.length;

289 token = ParseToken();

290 // Trailing commas are invalid according to the JSON RFC, but some

291 // consumers need the parsing leniency, so handle accordingly.

292 if (token.type == Token::OBJECT_END) {

293 if (!allow_trailing_comma_) {

294 SetErrorMessage(kTrailingComma, json_pos_);

295 return NULL;

296 }

297 // Trailing comma OK, stop parsing the Object.

298 break;

299 }

300 } else if (token.type != Token::OBJECT_END) {

301 // Unexpected value after last object value. Bail out.

302 return NULL;

303 }

304 }

305 if (token.type != Token::OBJECT_END)

306 return NULL;

307

308 break;

309 }

310

311 default:

312 // We got a token that's not a value.

313 return NULL;

314 }

315 json_pos_ += token.length;

316

317 --stack_depth_;

318 return node.release();

319 }

320

321 JSONReader::Token JSONReader::ParseNumberToken() {

322 // We just grab the number here. We validate the size in DecodeNumber.

323 // According to RFC4627, a valid number is: [minus] int [frac] [exp]

324 Token token(Token::NUMBER, json_pos_, 0);

325 wchar_t c = *json_pos_;

326 if ('-' == c) {

327 ++token.length;

328 c = token.NextChar();

329 }

330

331 if (!ReadInt(token, false))

332 return kInvalidToken;

333

334 // Optional fraction part

335 c = token.NextChar();

336 if ('.' == c) {

337 ++token.length;

338 if (!ReadInt(token, true))

339 return kInvalidToken;

340 c = token.NextChar();

341 }

342

343 // Optional exponent part

344 if ('e' == c \|\| 'E' == c) {

345 ++token.length;

346 c = token.NextChar();

347 if ('-' == c \|\| '+' == c) {

348 ++token.length;

349 c = token.NextChar();

350 }

351 if (!ReadInt(token, true))

352 return kInvalidToken;

353 }

354

355 return token;

356 }

357

358 Value* JSONReader::DecodeNumber(const Token& token) {

359 const std::wstring num_string(token.begin, token.length);

360

361 int num_int;

362 if (StringToInt(WideToUTF16Hack(num_string), &num_int))

363 return Value::CreateIntegerValue(num_int);

364

365 double num_double;

366 if (StringToDouble(WideToUTF16Hack(num_string), &num_double) &&

367 base::IsFinite(num_double))

368 return Value::CreateRealValue(num_double);

369

370 return NULL;

371 }

372

373 JSONReader::Token JSONReader::ParseStringToken() {

374 Token token(Token::STRING, json_pos_, 1);

375 wchar_t c = token.NextChar();

376 while ('\0' != c) {

377 if ('\\' == c) {

378 ++token.length;

379 c = token.NextChar();

380 // Make sure the escaped char is valid.

381 switch (c) {

382 case 'x':

383 if (!ReadHexDigits(token, 2)) {

384 SetErrorMessage(kInvalidEscape, json_pos_ + token.length);

385 return kInvalidToken;

386 }

387 break;

388 case 'u':

389 if (!ReadHexDigits(token, 4)) {

390 SetErrorMessage(kInvalidEscape, json_pos_ + token.length);

391 return kInvalidToken;

392 }

393 break;

394 case '\\':

395 case '/':

396 case 'b':

397 case 'f':

398 case 'n':

399 case 'r':

400 case 't':

401 case 'v':

402 case '"':

403 break;

404 default:

405 SetErrorMessage(kInvalidEscape, json_pos_ + token.length);

406 return kInvalidToken;

407 }

408 } else if ('"' == c) {

409 ++token.length;

410 return token;

411 }

412 ++token.length;

413 c = token.NextChar();

414 }

415 return kInvalidToken;

416 }

417

418 Value* JSONReader::DecodeString(const Token& token) {

419 std::wstring decoded_str;

420 decoded_str.reserve(token.length - 2);

421

422 for (int i = 1; i < token.length - 1; ++i) {

423 wchar_t c = *(token.begin + i);

424 if ('\\' == c) {

425 ++i;

426 c = *(token.begin + i);

427 switch (c) {

428 case '"':

429 case '/':

430 case '\\':

431 decoded_str.push_back(c);

432 break;

433 case 'b':

434 decoded_str.push_back('\b');

435 break;

436 case 'f':

437 decoded_str.push_back('\f');

438 break;

439 case 'n':

440 decoded_str.push_back('\n');

441 break;

442 case 'r':

443 decoded_str.push_back('\r');

444 break;

445 case 't':

446 decoded_str.push_back('\t');

447 break;

448 case 'v':

449 decoded_str.push_back('\v');

450 break;

451

452 case 'x':

453 decoded_str.push_back((HexToInt(*(token.begin + i + 1)) << 4) +

454 HexToInt(*(token.begin + i + 2)));

455 i += 2;

456 break;

457 case 'u':

458 decoded_str.push_back((HexToInt(*(token.begin + i + 1)) << 12 ) +

459 (HexToInt(*(token.begin + i + 2)) << 8) +

460 (HexToInt(*(token.begin + i + 3)) << 4) +

461 HexToInt(*(token.begin + i + 4)));

462 i += 4;

463 break;

464

465 default:

466 // We should only have valid strings at this point. If not,

467 // ParseStringToken didn't do it's job.

468 NOTREACHED();

469 return NULL;

470 }

471 } else {

472 // Not escaped

473 decoded_str.push_back(c);

474 }

475 }

476 return Value::CreateStringValue(decoded_str);

477 }

478

479 JSONReader::Token JSONReader::ParseToken() {

480 static const std::wstring kNullString(L"null");

481 static const std::wstring kTrueString(L"true");

482 static const std::wstring kFalseString(L"false");

483

484 EatWhitespaceAndComments();

485

486 Token token(Token::INVALID_TOKEN, 0, 0);

487 switch (*json_pos_) {

488 case '\0':

489 token.type = Token::END_OF_INPUT;

490 break;

491

492 case 'n':

493 if (NextStringMatch(kNullString))

494 token = Token(Token::NULL_TOKEN, json_pos_, 4);

495 break;

496

497 case 't':

498 if (NextStringMatch(kTrueString))

499 token = Token(Token::BOOL_TRUE, json_pos_, 4);

500 break;

501

502 case 'f':

503 if (NextStringMatch(kFalseString))

504 token = Token(Token::BOOL_FALSE, json_pos_, 5);

505 break;

506

507 case '[':

508 token = Token(Token::ARRAY_BEGIN, json_pos_, 1);

509 break;

510

511 case ']':

512 token = Token(Token::ARRAY_END, json_pos_, 1);

513 break;

514

515 case ',':

516 token = Token(Token::LIST_SEPARATOR, json_pos_, 1);

517 break;

518

519 case '{':

520 token = Token(Token::OBJECT_BEGIN, json_pos_, 1);

521 break;

522

523 case '}':

524 token = Token(Token::OBJECT_END, json_pos_, 1);

525 break;

526

527 case ':':

528 token = Token(Token::OBJECT_PAIR_SEPARATOR, json_pos_, 1);

529 break;

530

531 case '0':

532 case '1':

533 case '2':

534 case '3':

535 case '4':

536 case '5':

537 case '6':

538 case '7':

539 case '8':

540 case '9':

541 case '-':

542 token = ParseNumberToken();

543 break;

544

545 case '"':

546 token = ParseStringToken();

547 break;

548 }

549 return token;

550 }

551

552 bool JSONReader::NextStringMatch(const std::wstring& str) {

553 for (size_t i = 0; i < str.length(); ++i) {

554 if ('\0' == *json_pos_)

555 return false;

556 if (*(json_pos_ + i) != str[i])

557 return false;

558 }

559 return true;

560 }

561

562 void JSONReader::EatWhitespaceAndComments() {

563 while ('\0' != *json_pos_) {

564 switch (*json_pos_) {

565 case ' ':

566 case '\n':

567 case '\r':

568 case '\t':

569 ++json_pos_;

570 break;

571 case '/':

572 // TODO(tc): This isn't in the RFC so it should be a parser flag.

573 if (!EatComment())

574 return;

575 break;

576 default:

577 // Not a whitespace char, just exit.

578 return;

579 }

580 }

581 }

582

583 bool JSONReader::EatComment() {

584 if ('/' != *json_pos_)

585 return false;

586

587 wchar_t next_char = *(json_pos_ + 1);

588 if ('/' == next_char) {

589 // Line comment, read until \n or \r

590 json_pos_ += 2;

591 while ('\0' != *json_pos_) {

592 switch (*json_pos_) {

593 case '\n':

594 case '\r':

595 ++json_pos_;

596 return true;

597 default:

598 ++json_pos_;

599 }

600 }

601 } else if ('*' == next_char) {

602 // Block comment, read until */

603 json_pos_ += 2;

604 while ('\0' != *json_pos_) {

605 if ('' == json_pos_ && '/' == *(json_pos_ + 1)) {

606 json_pos_ += 2;

607 return true;

608 }

609 ++json_pos_;

610 }

611 } else {

612 return false;

613 }

614 return true;

615 }

616

617 void JSONReader::SetErrorMessage(const char* description,

618 const wchar_t* error_pos) {

619 int line_number = 1;

620 int column_number = 1;

621

622 // Figure out the line and column the error occured at.

623 for (const wchar_t* pos = start_pos_; pos != error_pos; ++pos) {

624 if (*pos == '\0') {

625 NOTREACHED();

626 return;

627 }

628

629 if (*pos == '\n') {

630 ++line_number;

631 column_number = 1;

632 } else {

633 ++column_number;

634 }

635 }

636

637 error_message_ = FormatErrorMessage(line_number, column_number, description);

638 }

OLD	NEW

« no previous file with comments | « base/json_reader.h ('k') | base/json_reader_unittest.cc » ('j') | no next file with comments »