| OLD | NEW |
| (Empty) | |
| 1 // Protocol Buffers - Google's data interchange format |
| 2 // Copyright 2008 Google Inc. All rights reserved. |
| 3 // https://developers.google.com/protocol-buffers/ |
| 4 // |
| 5 // Redistribution and use in source and binary forms, with or without |
| 6 // modification, are permitted provided that the following conditions are |
| 7 // met: |
| 8 // |
| 9 // * Redistributions of source code must retain the above copyright |
| 10 // notice, this list of conditions and the following disclaimer. |
| 11 // * Redistributions in binary form must reproduce the above |
| 12 // copyright notice, this list of conditions and the following disclaimer |
| 13 // in the documentation and/or other materials provided with the |
| 14 // distribution. |
| 15 // * Neither the name of Google Inc. nor the names of its |
| 16 // contributors may be used to endorse or promote products derived from |
| 17 // this software without specific prior written permission. |
| 18 // |
| 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 30 |
| 31 #ifndef GOOGLE_PROTOBUF_UTIL_CONVERTER_JSON_STREAM_PARSER_H__ |
| 32 #define GOOGLE_PROTOBUF_UTIL_CONVERTER_JSON_STREAM_PARSER_H__ |
| 33 |
| 34 #include <stack> |
| 35 #include <string> |
| 36 |
| 37 #include <google/protobuf/stubs/common.h> |
| 38 #include <google/protobuf/stubs/stringpiece.h> |
| 39 #include <google/protobuf/stubs/status.h> |
| 40 |
| 41 namespace google { |
| 42 namespace util { |
| 43 class Status; |
| 44 } // namespace util |
| 45 |
| 46 namespace protobuf { |
| 47 namespace util { |
| 48 namespace converter { |
| 49 |
| 50 class ObjectWriter; |
| 51 |
| 52 // A JSON parser that can parse a stream of JSON chunks rather than needing the |
| 53 // entire JSON string up front. It is a modified version of the parser in |
| 54 // //net/proto/json/json-parser.h that has been changed in the following ways: |
| 55 // - Changed from recursion to an explicit stack to allow resumption |
| 56 // - Added support for int64 and uint64 numbers |
| 57 // - Removed support for octal and decimal escapes |
| 58 // - Removed support for numeric keys |
| 59 // - Removed support for functions (javascript) |
| 60 // - Removed some lax-comma support (but kept trailing comma support) |
| 61 // - Writes directly to an ObjectWriter rather than using subclassing |
| 62 // |
| 63 // Here is an example usage: |
| 64 // JsonStreamParser parser(ow_.get()); |
| 65 // util::Status result = parser.Parse(chunk1); |
| 66 // result.Update(parser.Parse(chunk2)); |
| 67 // result.Update(parser.FinishParse()); |
| 68 // GOOGLE_DCHECK(result.ok()) << "Failed to parse JSON"; |
| 69 // |
| 70 // This parser is thread-compatible as long as only one thread is calling a |
| 71 // Parse() method at a time. |
| 72 class LIBPROTOBUF_EXPORT JsonStreamParser { |
| 73 public: |
| 74 // Creates a JsonStreamParser that will write to the given ObjectWriter. |
| 75 explicit JsonStreamParser(ObjectWriter* ow); |
| 76 virtual ~JsonStreamParser(); |
| 77 |
| 78 // Parses a UTF-8 encoded JSON string from a StringPiece. |
| 79 util::Status Parse(StringPiece json); |
| 80 |
| 81 |
| 82 // Finish parsing the JSON string. |
| 83 util::Status FinishParse(); |
| 84 |
| 85 |
| 86 private: |
| 87 enum TokenType { |
| 88 BEGIN_STRING, // " or ' |
| 89 BEGIN_NUMBER, // - or digit |
| 90 BEGIN_TRUE, // true |
| 91 BEGIN_FALSE, // false |
| 92 BEGIN_NULL, // null |
| 93 BEGIN_OBJECT, // { |
| 94 END_OBJECT, // } |
| 95 BEGIN_ARRAY, // [ |
| 96 END_ARRAY, // ] |
| 97 ENTRY_SEPARATOR, // : |
| 98 VALUE_SEPARATOR, // , |
| 99 BEGIN_KEY, // letter, _, $ or digit. Must begin with non-digit |
| 100 UNKNOWN // Unknown token or we ran out of the stream. |
| 101 }; |
| 102 |
| 103 enum ParseType { |
| 104 VALUE, // Expects a {, [, true, false, null, string or number |
| 105 OBJ_MID, // Expects a ',' or } |
| 106 ENTRY, // Expects a key or } |
| 107 ENTRY_MID, // Expects a : |
| 108 ARRAY_VALUE, // Expects a value or ] |
| 109 ARRAY_MID // Expects a ',' or ] |
| 110 }; |
| 111 |
| 112 // Holds the result of parsing a number |
| 113 struct NumberResult { |
| 114 enum Type { DOUBLE, INT, UINT }; |
| 115 Type type; |
| 116 union { |
| 117 double double_val; |
| 118 int64 int_val; |
| 119 uint64 uint_val; |
| 120 }; |
| 121 }; |
| 122 |
| 123 // Parses a single chunk of JSON, returning an error if the JSON was invalid. |
| 124 util::Status ParseChunk(StringPiece json); |
| 125 |
| 126 // Runs the parser based on stack_ and p_, until the stack is empty or p_ runs |
| 127 // out of data. If we unexpectedly run out of p_ we push the latest back onto |
| 128 // the stack and return. |
| 129 util::Status RunParser(); |
| 130 |
| 131 // Parses a value from p_ and writes it to ow_. |
| 132 // A value may be an object, array, true, false, null, string or number. |
| 133 util::Status ParseValue(TokenType type); |
| 134 |
| 135 // Parses a string and writes it out to the ow_. |
| 136 util::Status ParseString(); |
| 137 |
| 138 // Parses a string, storing the result in parsed_. |
| 139 util::Status ParseStringHelper(); |
| 140 |
| 141 // This function parses unicode escape sequences in strings. It returns an |
| 142 // error when there's a parsing error, either the size is not the expected |
| 143 // size or a character is not a hex digit. When it returns str will contain |
| 144 // what has been successfully parsed so far. |
| 145 util::Status ParseUnicodeEscape(); |
| 146 |
| 147 // Expects p_ to point to a JSON number, writes the number to the writer using |
| 148 // the appropriate Render method based on the type of number. |
| 149 util::Status ParseNumber(); |
| 150 |
| 151 // Parse a number into a NumberResult, reporting an error if no number could |
| 152 // be parsed. This method will try to parse into a uint64, int64, or double |
| 153 // based on whether the number was positive or negative or had a decimal |
| 154 // component. |
| 155 util::Status ParseNumberHelper(NumberResult* result); |
| 156 |
| 157 // Handles a { during parsing of a value. |
| 158 util::Status HandleBeginObject(); |
| 159 |
| 160 // Parses from the ENTRY state. |
| 161 util::Status ParseEntry(TokenType type); |
| 162 |
| 163 // Parses from the ENTRY_MID state. |
| 164 util::Status ParseEntryMid(TokenType type); |
| 165 |
| 166 // Parses from the OBJ_MID state. |
| 167 util::Status ParseObjectMid(TokenType type); |
| 168 |
| 169 // Handles a [ during parsing of a value. |
| 170 util::Status HandleBeginArray(); |
| 171 |
| 172 // Parses from the ARRAY_VALUE state. |
| 173 util::Status ParseArrayValue(TokenType type); |
| 174 |
| 175 // Parses from the ARRAY_MID state. |
| 176 util::Status ParseArrayMid(TokenType type); |
| 177 |
| 178 // Expects p_ to point to an unquoted literal |
| 179 util::Status ParseTrue(); |
| 180 util::Status ParseFalse(); |
| 181 util::Status ParseNull(); |
| 182 |
| 183 // Report a failure as a util::Status. |
| 184 util::Status ReportFailure(StringPiece message); |
| 185 |
| 186 // Report a failure due to an UNKNOWN token type. We check if we hit the |
| 187 // end of the stream and if we're finishing or not to detect what type of |
| 188 // status to return in this case. |
| 189 util::Status ReportUnknown(StringPiece message); |
| 190 |
| 191 // Advance p_ past all whitespace or until the end of the string. |
| 192 void SkipWhitespace(); |
| 193 |
| 194 // Advance p_ one UTF-8 character |
| 195 void Advance(); |
| 196 |
| 197 // Expects p_ to point to the beginning of a key. |
| 198 util::Status ParseKey(); |
| 199 |
| 200 // Return the type of the next token at p_. |
| 201 TokenType GetNextTokenType(); |
| 202 |
| 203 // The object writer to write parse events to. |
| 204 ObjectWriter* ow_; |
| 205 |
| 206 // The stack of parsing we still need to do. When the stack runs empty we will |
| 207 // have parsed a single value from the root (e.g. an object or list). |
| 208 std::stack<ParseType> stack_; |
| 209 |
| 210 // Contains any leftover text from a previous chunk that we weren't able to |
| 211 // fully parse, for example the start of a key or number. |
| 212 string leftover_; |
| 213 |
| 214 // The current chunk of JSON being parsed. Primarily used for providing |
| 215 // context during error reporting. |
| 216 StringPiece json_; |
| 217 |
| 218 // A pointer within the current JSON being parsed, used to track location. |
| 219 StringPiece p_; |
| 220 |
| 221 // Stores the last key read, as we separate parsing of keys and values. |
| 222 StringPiece key_; |
| 223 |
| 224 // Storage for key_ if we need to keep ownership, for example between chunks |
| 225 // or if the key was unescaped from a JSON string. |
| 226 string key_storage_; |
| 227 |
| 228 // True during the FinishParse() call, so we know that any errors are fatal. |
| 229 // For example an unterminated string will normally result in cancelling and |
| 230 // trying during the next chunk, but during FinishParse() it is an error. |
| 231 bool finishing_; |
| 232 |
| 233 // String we parsed during a call to ParseStringHelper(). |
| 234 StringPiece parsed_; |
| 235 |
| 236 // Storage for the string we parsed. This may be empty if the string was able |
| 237 // to be parsed directly from the input. |
| 238 string parsed_storage_; |
| 239 |
| 240 // The character that opened the string, either ' or ". |
| 241 // A value of 0 indicates that string parsing is not in process. |
| 242 char string_open_; |
| 243 |
| 244 // Storage for the chunk that are being parsed in ParseChunk(). |
| 245 string chunk_storage_; |
| 246 |
| 247 // Whether to allow non UTF-8 encoded input and replace invalid code points. |
| 248 bool coerce_to_utf8_; |
| 249 |
| 250 GOOGLE_DISALLOW_IMPLICIT_CONSTRUCTORS(JsonStreamParser); |
| 251 }; |
| 252 |
| 253 } // namespace converter |
| 254 } // namespace util |
| 255 } // namespace protobuf |
| 256 |
| 257 } // namespace google |
| 258 #endif // GOOGLE_PROTOBUF_UTIL_CONVERTER_JSON_STREAM_PARSER_H__ |
| OLD | NEW |