Chromium Code Reviews| Index: base/json/json_parser.h |
| diff --git a/base/json/json_parser.h b/base/json/json_parser.h |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..20a2c9316a1ee361fe4cc765cad4875139e9b9b0 |
| --- /dev/null |
| +++ b/base/json/json_parser.h |
| @@ -0,0 +1,246 @@ |
| +// Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +#ifndef BASE_JSON_JSON_PARSER_H_ |
| +#define BASE_JSON_JSON_PARSER_H_ |
| + |
|
Mark Mentovai
2012/04/19 16:40:12
#pragma once
Robert Sesek
2012/05/03 15:34:52
Done.
|
| +#include <string> |
| + |
| +#include "base/basictypes.h" |
| +#include "base/compiler_specific.h" |
| +#include "base/json/json_reader.h" |
| +#include "base/string_piece.h" |
| + |
| +namespace base { |
| +class Value; |
| +} |
|
Mark Mentovai
2012/04/19 16:40:12
} // namespace base
Robert Sesek
2012/05/03 15:34:52
I don't do this for forward declares.
|
| + |
| +// Chromium and Chromium OS check out gtest to different places, so we're |
|
Mark Mentovai
2012/04/19 16:40:12
Who are “we?”
Robert Sesek
2012/05/03 15:34:52
This was copied… done.
|
| +// unable to compile on both if we include gtest_prod.h here. Instead, include |
|
Mark Mentovai
2012/04/19 16:40:12
Isn’t there some macro that’s set if you’re buildi
Robert Sesek
2012/05/03 15:34:52
No idea. I couldn't find one.
|
| +// its only contents -- this will need to be updated if the macro ever changes. |
| +#define FRIEND_TEST(test_case_name, test_name)\ |
| +friend class test_case_name##_##test_name##_Test |
| + |
| +#define FRIEND_TEST_ALL_PREFIXES(test_case_name, test_name) \ |
| + FRIEND_TEST(test_case_name, test_name); \ |
| + FRIEND_TEST(test_case_name, DISABLED_##test_name); \ |
| + FRIEND_TEST(test_case_name, FLAKY_##test_name); \ |
| + FRIEND_TEST(test_case_name, FAILS_##test_name) |
| + |
| +namespace base { |
| +namespace internal { |
| + |
| +class JSONParserTest; |
| + |
| +// The implementation behind the JSONReader interface. This class is not meant |
| +// to be used directly; it encapsulates logic that need not be exposed publicly. |
| +// |
| +// This parser guarantees O(n) time through the input string. It also optimizes |
| +// base::StringValue by using StringPiece where possible when returning Value |
| +// objects by using "hidden roots," discussed in the implementation. |
| +// |
| +// Iteration happens on the byte level, with the functions CanConsume and |
| +// NextChar. The conversion from byte to JSON token happens without advancing |
| +// the parser in GetNextToken/ParseToken, that is that tokenization operates on |
|
Mark Mentovai
2012/04/19 16:40:12
The “, that is that” construction is hard to read.
Robert Sesek
2012/05/03 15:34:52
Done.
|
| +// the current parser position without advancing. |
| +// |
| +// Built on top of these are a family of Consume functions that iterate |
| +// internally. Invariant: on entry of a Consume function, the parser is wound |
| +// to the first byte of a valid JSON token. On exit, it is on the last byte |
| +// of a token, such that the next loop of the parser will be at the byte |
|
Mark Mentovai
2012/04/19 16:40:12
Nit: “iteration,” not “loop.”
Robert Sesek
2012/05/03 15:34:52
Done.
|
| +// immediately following the token, which would likely be the first byte of the |
| +// next token. |
| +class JSONParser { |
| + public: |
| + explicit JSONParser(int options); |
| + virtual ~JSONParser(); |
|
tfarina
2012/04/19 22:48:18
nit: I think this doesn't need to be virtual.
Robert Sesek
2012/05/03 15:34:52
Done.
|
| + |
| + // Parses the input string according to the set options and returns the |
| + // result as a Value owned by the caller. |
| + Value* Parse(const std::string& input); |
| + |
| + // Returns the error code. |
| + JSONReader::JsonParseError error_code() const; |
| + |
| + // Returns the human-friendly error message. |
| + std::string GetErrorMessage() const; |
| + |
| + private: |
| + enum Token { |
| + T_OBJECT_BEGIN, // { |
| + T_OBJECT_END, // } |
| + T_ARRAY_BEGIN, // [ |
| + T_ARRAY_END, // ] |
| + T_STRING, |
| + T_NUMBER, |
| + T_BOOL_TRUE, // true |
| + T_BOOL_FALSE, // false |
| + T_NULL, // null |
| + T_LIST_SEPARATOR, // , |
| + T_OBJECT_PAIR_SEPARATOR, // : |
| + T_END_OF_INPUT, |
| + T_INVALID_TOKEN, |
| + }; |
| + |
| + // A helper class used for parsing strings. One optimization performed is to |
| + // create base::Value with a StringPiece to avoid unnecessary std::string |
| + // copies. This is not possible if the input string needs to be decoded from |
| + // UTF-16 to UTF-8, or if an escape sequence causes characters to be skipped. |
| + // This class centralizes that logic. |
| + class StringBuilder { |
| + public: |
| + // Empty constructor. Used for creating a builder with which to Swap(). |
| + StringBuilder(); |
| + |
| + // |pos| is the beginning of an input string, excluding the |"|. |
| + explicit StringBuilder(const char* pos); |
| + ~StringBuilder(); |
|
Mark Mentovai
2012/04/19 16:40:12
Blank line before to make it apparent than the com
Robert Sesek
2012/05/03 15:34:52
Done.
|
| + |
| + // Swaps the contents of |other| with this. |
| + void Swap(StringBuilder* other); |
| + |
| + // Either increases the |length_| of the string or copies the character if |
| + // the StringBuilder has been converted. |
| + void Append(const int32& c); |
| + |
| + // Appends a string to the std::string. Must be Convert()ed to use. |
| + void AppendString(const std::string& str); |
| + |
| + // Converts the builder from its default StringPiece to a full std::string, |
| + // performing a copy. |
| + void Convert(); |
| + |
| + // Returns whether the builder can be converted to a StringPiece. |
| + bool CanBeStringPiece(); |
|
Mark Mentovai
2012/04/19 16:40:12
Can this be a const function?
Robert Sesek
2012/05/03 15:34:52
Done.
|
| + |
| + // Returns the StringPiece representation. Returns an empty piece if it |
| + // cannot be converted. |
|
Mark Mentovai
2012/04/19 16:40:12
Lines 110-111 told me that Convert() converts from
Robert Sesek
2012/05/03 15:34:52
I don't think that's what it says…
|
| + StringPiece AsStringPiece(); |
|
Mark Mentovai
2012/04/19 16:40:12
Can this return a const ref?
Robert Sesek
2012/05/03 15:34:52
No, but StringPiece is cheap, so it's okay.
|
| + |
| + // Returns the builder as a std::string. |
| + std::string AsString(); |
|
Mark Mentovai
2012/04/19 16:40:12
Can this return a const ref too?
Robert Sesek
2012/05/03 15:34:52
Yes.
|
| + |
| + private: |
| + // The beginning of the input string. |
| + const char* pos_; |
| + // Number of bytes in |pos_| that compose its length. |
|
Mark Mentovai
2012/04/19 16:40:12
Blank line before this.
Mark Mentovai
2012/04/19 16:40:12
I don’t know for sure what you mean by “compose it
Robert Sesek
2012/05/03 15:34:52
Done.
Robert Sesek
2012/05/03 15:34:52
Done.
Robert Sesek
2012/05/03 15:34:52
Done.
|
| + size_t length_; |
| + // The copied string representation. NULL until Convert() is called. |
|
Mark Mentovai
2012/04/19 16:40:12
Blank line before this too.
Robert Sesek
2012/05/03 15:34:52
Done.
|
| + // Strong. scoped_ptr<T> has too much of an overhead here. |
| + std::string* string_; |
| + }; |
| + |
| + // Quick check that the stream has enough to consume |length| more bytes. |
|
Mark Mentovai
2012/04/19 16:40:12
The stream has enough…what?
Robert Sesek
2012/05/03 15:34:52
Done.
|
| + inline bool CanConsume(int length); |
|
Mark Mentovai
2012/04/19 16:40:12
Get rid of the “inline”.
Robert Sesek
2012/05/03 15:34:52
Done.
|
| + |
| + // The basic way to consume a single character in the stream. Consumes one |
| + // byte of the input stream and returns a pointer to the rest of it. |
| + const char* NextChar(); |
| + // Performs the equivalent of NextChar N times. |
|
Mark Mentovai
2012/04/19 16:40:12
Blank line before.
Robert Sesek
2012/05/03 15:34:52
Done.
|
| + void NextNChars(int n); |
| + |
| + // Skips over whitespace and comments to find the next token in the stream. |
| + // This does not advance the parser for non-whitespace or comment chars. |
| + Token GetNextToken(); |
| + |
| + // Consumes whitespace characters and comments until the next non-that is |
|
Mark Mentovai
2012/04/19 16:40:12
😃
|
| + // encountered. |
| + void EatWhitespaceAndComments(); |
| + // Helper function that consumes a comment, assuming that the parser is |
|
Mark Mentovai
2012/04/19 16:40:12
Blank line before.
Robert Sesek
2012/05/03 15:34:52
This and the remaining instances are done delibera
|
| + // currently wound to a '/'. |
| + bool EatComment(); |
| + |
| + // Calls GetNextToken() and then ParseToken(). Caller owns the result. |
| + Value* ParseNextToken(); |
| + |
| + // Takes a token that represents the start of a Value ("a structural token" |
| + // in RFC terms) and consumes it, returning the result as an object the |
| + // caller owns. |
| + Value* ParseToken(Token token); |
| + |
| + // Assuming that the parser is currently wound to '{', this parses a JSON |
| + // object into a DictionaryValue. |
| + Value* ConsumeDictionary(); |
| + |
| + // Assuming that the parser is wound to '[', this parses a JSON list into a |
| + // ListValue. |
| + Value* ConsumeList(); |
| + |
| + // Calls through ConsumeStringRaw and wraps it in a value. |
| + Value* ConsumeString(); |
| + |
| + // Assuming that the parser is wound to a double quote, this parses a string, |
| + // potentially performing a UTF-16 to UTF-8 conversion. Returns true on |
|
Mark Mentovai
2012/04/19 16:40:12
What’s the “potential?” When does it and when does
Robert Sesek
2012/05/03 15:34:52
Done.
|
| + // success and Swap()s the result into |out|. Returns false on failure with |
| + // error information set. |
| + bool ConsumeStringRaw(StringBuilder* out); |
| + // Helper function for ConsumeStringRaw() that consumes the next four to 10 |
|
Mark Mentovai
2012/04/19 16:40:12
Blank line before.
Mark Mentovai
2012/04/19 16:40:12
Four OR ten, not four TO ten, right?
Robert Sesek
2012/05/03 15:34:52
Done.
Robert Sesek
2012/05/03 15:34:52
Done.
|
| + // bytes (parser is wound to the first character of a HEX sequence, with the |
| + // potential for consuming another \uXXXX for a surrogate). Returns true on |
| + // success and places the UTF8 code units in |dest_string|, and false on |
| + // failure. |
| + bool DecodeUTF16(std::string* dest_string); |
| + |
| + // Assuming that the parser is wound to the start of a valid JSON number, |
| + // this parses and converts it to either an int or double value. |
| + Value* ConsumeNumber(); |
| + // Helper that reads characters that are ints. Returns true if a number was |
|
Mark Mentovai
2012/04/19 16:40:12
Blank line before.
|
| + // read and false on error. |
| + bool ReadInt(bool allow_leading_zeros); |
| + |
| + // Consumes the literal values of |true|, |false|, and |null|, assuming the |
| + // parser is wound to the first character of any of those. |
| + Value* ConsumeLiteral(); |
| + |
| + // Compares two string buffers of a given length. |
| + bool StringsAreEqual(const char* left, const char* right, size_t len); |
|
Mark Mentovai
2012/04/19 16:40:12
This one seems like it could be static.
It may no
Robert Sesek
2012/05/03 15:34:52
Made static. Considering this entire file is an im
|
| + |
| + // Sets the error information to |code| at the current column, based on |
| + // |index_| and |index_last_line_|, with an optional positive/negative |
| + // adjustment by |column_adjust|. |
| + void ReportError(JSONReader::JsonParseError code, int column_adjust); |
| + static std::string FormatErrorMessage(int line, int column, |
|
Mark Mentovai
2012/04/19 16:40:12
What does this do?
Robert Sesek
2012/05/03 15:34:52
Done.
|
| + const std::string& description); |
| + |
| + // Options that control parsing. |
|
Mark Mentovai
2012/04/19 16:40:12
What values might this have? Oh, base::JSONParserO
Robert Sesek
2012/05/03 15:34:52
Done.
|
| + int options_; |
| + |
| + // Pointer to the start of the input data. |
| + const char* start_pos_; |
| + // Pointer to the current position in the input data. Equivalent to |
|
Mark Mentovai
2012/04/19 16:40:12
Blank line before this, and 214, and 216.
Robert Sesek
2012/05/03 15:34:52
Done.
|
| + // |start_pos_ + index_|. |
|
Mark Mentovai
2012/04/19 16:40:12
If this is equivalent to something else that’s alr
Robert Sesek
2012/05/03 15:34:52
Because both are checked frequently and independen
|
| + const char* pos_; |
| + // Pointer to the last character of the input data. |
| + const char* end_pos_; |
| + // The index in the input stream to which the parser is wound. |
| + int index_; |
| + |
| + // The number of times the parser has recursed (current stack depth). |
| + int stack_depth_; |
| + |
| + // The line number that the parser is at currently. |
| + int line_number_; |
| + // The last value of |index_| on the previous line. |
|
Mark Mentovai
2012/04/19 16:40:12
Blank line before.
Robert Sesek
2012/05/03 15:34:52
Done.
|
| + int index_last_line_; |
| + |
| + // Error information. |
| + JSONReader::JsonParseError error_code_; |
| + int error_line_; |
| + int error_column_; |
| + |
| + friend class JSONParserTest; |
| + FRIEND_TEST_ALL_PREFIXES(JSONParserTest, NextChar); |
| + FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeDictionary); |
| + FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeList); |
| + FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeString); |
| + FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeLiterals); |
| + FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeNumbers); |
| + FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ErrorMessages); |
| + DISALLOW_COPY_AND_ASSIGN(JSONParser); |
|
Mark Mentovai
2012/04/19 16:40:12
Blank line before.
Robert Sesek
2012/05/03 15:34:52
Done.
|
| +}; |
| + |
| +} // namespace internal |
| +} // namespace base |
| + |
| +#endif // BASE_JSON_JSON_PARSER_H_ |