Index: third_party/protobuf/src/google/protobuf/util/internal/json_stream_parser.cc |
diff --git a/third_party/protobuf/src/google/protobuf/util/internal/json_stream_parser.cc b/third_party/protobuf/src/google/protobuf/util/internal/json_stream_parser.cc |
new file mode 100644 |
index 0000000000000000000000000000000000000000..df916751386848cb27cac043b9f97651c0a9dc85 |
--- /dev/null |
+++ b/third_party/protobuf/src/google/protobuf/util/internal/json_stream_parser.cc |
@@ -0,0 +1,774 @@ |
+// Protocol Buffers - Google's data interchange format |
+// Copyright 2008 Google Inc. All rights reserved. |
+// https://developers.google.com/protocol-buffers/ |
+// |
+// Redistribution and use in source and binary forms, with or without |
+// modification, are permitted provided that the following conditions are |
+// met: |
+// |
+// * Redistributions of source code must retain the above copyright |
+// notice, this list of conditions and the following disclaimer. |
+// * Redistributions in binary form must reproduce the above |
+// copyright notice, this list of conditions and the following disclaimer |
+// in the documentation and/or other materials provided with the |
+// distribution. |
+// * Neither the name of Google Inc. nor the names of its |
+// contributors may be used to endorse or promote products derived from |
+// this software without specific prior written permission. |
+// |
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
+ |
+#include <google/protobuf/util/internal/json_stream_parser.h> |
+ |
+#include <algorithm> |
+#include <cctype> |
+#include <cerrno> |
+#include <cstdlib> |
+#include <cstring> |
+#include <memory> |
+#ifndef _SHARED_PTR_H |
+#include <google/protobuf/stubs/shared_ptr.h> |
+#endif |
+ |
+#include <google/protobuf/stubs/logging.h> |
+#include <google/protobuf/stubs/common.h> |
+#include <google/protobuf/stubs/strutil.h> |
+#include <google/protobuf/util/internal/object_writer.h> |
+ |
+namespace google { |
+namespace protobuf { |
+namespace util { |
+ |
+// Allow these symbols to be referenced as util::Status, util::error::* in |
+// this file. |
+using util::Status; |
+namespace error { |
+using util::error::INTERNAL; |
+using util::error::INVALID_ARGUMENT; |
+} // namespace error |
+ |
+namespace converter { |
+ |
+// Number of digits in a unicode escape sequence (/uXXXX) |
+static const int kUnicodeEscapedLength = 6; |
+ |
+// Length of the true, false, and null literals. |
+static const int true_len = strlen("true"); |
+static const int false_len = strlen("false"); |
+static const int null_len = strlen("null"); |
+ |
+inline bool IsLetter(char c) { |
+ return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || (c == '_') || |
+ (c == '$'); |
+} |
+ |
+inline bool IsAlphanumeric(char c) { |
+ return IsLetter(c) || ('0' <= c && c <= '9'); |
+} |
+ |
+static bool ConsumeKey(StringPiece* input, StringPiece* key) { |
+ if (input->empty() || !IsLetter((*input)[0])) return false; |
+ int len = 1; |
+ for (; len < input->size(); ++len) { |
+ if (!IsAlphanumeric((*input)[len])) { |
+ break; |
+ } |
+ } |
+ *key = StringPiece(input->data(), len); |
+ *input = StringPiece(input->data() + len, input->size() - len); |
+ return true; |
+} |
+ |
+static bool MatchKey(StringPiece input) { |
+ return !input.empty() && IsLetter(input[0]); |
+} |
+ |
+JsonStreamParser::JsonStreamParser(ObjectWriter* ow) |
+ : ow_(ow), |
+ stack_(), |
+ leftover_(), |
+ json_(), |
+ p_(), |
+ key_(), |
+ key_storage_(), |
+ finishing_(false), |
+ parsed_(), |
+ parsed_storage_(), |
+ string_open_(0), |
+ chunk_storage_(), |
+ coerce_to_utf8_(false) { |
+ // Initialize the stack with a single value to be parsed. |
+ stack_.push(VALUE); |
+} |
+ |
+JsonStreamParser::~JsonStreamParser() {} |
+ |
+ |
+util::Status JsonStreamParser::Parse(StringPiece json) { |
+ StringPiece chunk = json; |
+ // If we have leftovers from a previous chunk, append the new chunk to it |
+ // and create a new StringPiece pointing at the string's data. This could |
+ // be large but we rely on the chunks to be small, assuming they are |
+ // fragments of a Cord. |
+ if (!leftover_.empty()) { |
+ // Don't point chunk to leftover_ because leftover_ will be updated in |
+ // ParseChunk(chunk). |
+ chunk_storage_.swap(leftover_); |
+ json.AppendToString(&chunk_storage_); |
+ chunk = StringPiece(chunk_storage_); |
+ } |
+ |
+ // Find the structurally valid UTF8 prefix and parse only that. |
+ int n = internal::UTF8SpnStructurallyValid(chunk); |
+ if (n > 0) { |
+ util::Status status = ParseChunk(chunk.substr(0, n)); |
+ |
+ // Any leftover characters are stashed in leftover_ for later parsing when |
+ // there is more data available. |
+ chunk.substr(n).AppendToString(&leftover_); |
+ return status; |
+ } else { |
+ chunk.CopyToString(&leftover_); |
+ return util::Status::OK; |
+ } |
+} |
+ |
+util::Status JsonStreamParser::FinishParse() { |
+ // If we do not expect anything and there is nothing left to parse we're all |
+ // done. |
+ if (stack_.empty() && leftover_.empty()) { |
+ return util::Status::OK; |
+ } |
+ |
+ // Storage for UTF8-coerced string. |
+ google::protobuf::scoped_array<char> utf8; |
+ if (coerce_to_utf8_) { |
+ utf8.reset(new char[leftover_.size()]); |
+ char* coerced = internal::UTF8CoerceToStructurallyValid(leftover_, utf8.get(), ' '); |
+ p_ = json_ = StringPiece(coerced, leftover_.size()); |
+ } else { |
+ p_ = json_ = leftover_; |
+ if (!internal::IsStructurallyValidUTF8(leftover_)) { |
+ return ReportFailure("Encountered non UTF-8 code points."); |
+ } |
+ } |
+ |
+ // Parse the remainder in finishing mode, which reports errors for things like |
+ // unterminated strings or unknown tokens that would normally be retried. |
+ finishing_ = true; |
+ util::Status result = RunParser(); |
+ if (result.ok()) { |
+ SkipWhitespace(); |
+ if (!p_.empty()) { |
+ result = ReportFailure("Parsing terminated before end of input."); |
+ } |
+ } |
+ return result; |
+} |
+ |
+util::Status JsonStreamParser::ParseChunk(StringPiece chunk) { |
+ // Do not do any work if the chunk is empty. |
+ if (chunk.empty()) return util::Status::OK; |
+ |
+ p_ = json_ = chunk; |
+ |
+ finishing_ = false; |
+ util::Status result = RunParser(); |
+ if (!result.ok()) return result; |
+ |
+ SkipWhitespace(); |
+ if (p_.empty()) { |
+ // If we parsed everything we had, clear the leftover. |
+ leftover_.clear(); |
+ } else { |
+ // If we do not expect anything i.e. stack is empty, and we have non-empty |
+ // string left to parse, we report an error. |
+ if (stack_.empty()) { |
+ return ReportFailure("Parsing terminated before end of input."); |
+ } |
+ // If we expect future data i.e. stack is non-empty, and we have some |
+ // unparsed data left, we save it for later parse. |
+ leftover_ = p_.ToString(); |
+ } |
+ return util::Status::OK; |
+} |
+ |
+util::Status JsonStreamParser::RunParser() { |
+ while (!stack_.empty()) { |
+ ParseType type = stack_.top(); |
+ TokenType t = (string_open_ == 0) ? GetNextTokenType() : BEGIN_STRING; |
+ stack_.pop(); |
+ util::Status result; |
+ switch (type) { |
+ case VALUE: |
+ result = ParseValue(t); |
+ break; |
+ |
+ case OBJ_MID: |
+ result = ParseObjectMid(t); |
+ break; |
+ |
+ case ENTRY: |
+ result = ParseEntry(t); |
+ break; |
+ |
+ case ENTRY_MID: |
+ result = ParseEntryMid(t); |
+ break; |
+ |
+ case ARRAY_VALUE: |
+ result = ParseArrayValue(t); |
+ break; |
+ |
+ case ARRAY_MID: |
+ result = ParseArrayMid(t); |
+ break; |
+ |
+ default: |
+ result = util::Status(util::error::INTERNAL, |
+ StrCat("Unknown parse type: ", type)); |
+ break; |
+ } |
+ if (!result.ok()) { |
+ // If we were cancelled, save our state and try again later. |
+ if (!finishing_ && result == util::Status::CANCELLED) { |
+ stack_.push(type); |
+ // If we have a key we still need to render, make sure to save off the |
+ // contents in our own storage. |
+ if (!key_.empty() && key_storage_.empty()) { |
+ key_.AppendToString(&key_storage_); |
+ key_ = StringPiece(key_storage_); |
+ } |
+ result = util::Status::OK; |
+ } |
+ return result; |
+ } |
+ } |
+ return util::Status::OK; |
+} |
+ |
+util::Status JsonStreamParser::ParseValue(TokenType type) { |
+ switch (type) { |
+ case BEGIN_OBJECT: |
+ return HandleBeginObject(); |
+ case BEGIN_ARRAY: |
+ return HandleBeginArray(); |
+ case BEGIN_STRING: |
+ return ParseString(); |
+ case BEGIN_NUMBER: |
+ return ParseNumber(); |
+ case BEGIN_TRUE: |
+ return ParseTrue(); |
+ case BEGIN_FALSE: |
+ return ParseFalse(); |
+ case BEGIN_NULL: |
+ return ParseNull(); |
+ case UNKNOWN: |
+ return ReportUnknown("Expected a value."); |
+ default: { |
+ // Special case for having been cut off while parsing, wait for more data. |
+ // This handles things like 'fals' being at the end of the string, we |
+ // don't know if the next char would be e, completing it, or something |
+ // else, making it invalid. |
+ if (!finishing_ && p_.length() < false_len) { |
+ return util::Status::CANCELLED; |
+ } |
+ return ReportFailure("Unexpected token."); |
+ } |
+ } |
+} |
+ |
+util::Status JsonStreamParser::ParseString() { |
+ util::Status result = ParseStringHelper(); |
+ if (result.ok()) { |
+ ow_->RenderString(key_, parsed_); |
+ key_.clear(); |
+ parsed_.clear(); |
+ parsed_storage_.clear(); |
+ } |
+ return result; |
+} |
+ |
+util::Status JsonStreamParser::ParseStringHelper() { |
+ // If we haven't seen the start quote, grab it and remember it for later. |
+ if (string_open_ == 0) { |
+ string_open_ = *p_.data(); |
+ GOOGLE_DCHECK(string_open_ == '\"' || string_open_ == '\''); |
+ Advance(); |
+ } |
+ // Track where we last copied data from so we can minimize copying. |
+ const char* last = p_.data(); |
+ while (!p_.empty()) { |
+ const char* data = p_.data(); |
+ if (*data == '\\') { |
+ // We're about to handle an escape, copy all bytes from last to data. |
+ if (last < data) { |
+ parsed_storage_.append(last, data - last); |
+ last = data; |
+ } |
+ // If we ran out of string after the \, cancel or report an error |
+ // depending on if we expect more data later. |
+ if (p_.length() == 1) { |
+ if (!finishing_) { |
+ return util::Status::CANCELLED; |
+ } |
+ return ReportFailure("Closing quote expected in string."); |
+ } |
+ // Parse a unicode escape if we found \u in the string. |
+ if (data[1] == 'u') { |
+ util::Status result = ParseUnicodeEscape(); |
+ if (!result.ok()) { |
+ return result; |
+ } |
+ // Move last pointer past the unicode escape and continue. |
+ last = p_.data(); |
+ continue; |
+ } |
+ // Handle the standard set of backslash-escaped characters. |
+ switch (data[1]) { |
+ case 'b': |
+ parsed_storage_.push_back('\b'); |
+ break; |
+ case 'f': |
+ parsed_storage_.push_back('\f'); |
+ break; |
+ case 'n': |
+ parsed_storage_.push_back('\n'); |
+ break; |
+ case 'r': |
+ parsed_storage_.push_back('\r'); |
+ break; |
+ case 't': |
+ parsed_storage_.push_back('\t'); |
+ break; |
+ case 'v': |
+ parsed_storage_.push_back('\v'); |
+ break; |
+ default: |
+ parsed_storage_.push_back(data[1]); |
+ } |
+ // We handled two characters, so advance past them and continue. |
+ p_.remove_prefix(2); |
+ last = p_.data(); |
+ continue; |
+ } |
+ // If we found the closing quote note it, advance past it, and return. |
+ if (*data == string_open_) { |
+ // If we didn't copy anything, reuse the input buffer. |
+ if (parsed_storage_.empty()) { |
+ parsed_ = StringPiece(last, data - last); |
+ } else { |
+ if (last < data) { |
+ parsed_storage_.append(last, data - last); |
+ last = data; |
+ } |
+ parsed_ = StringPiece(parsed_storage_); |
+ } |
+ // Clear the quote char so next time we try to parse a string we'll |
+ // start fresh. |
+ string_open_ = 0; |
+ Advance(); |
+ return util::Status::OK; |
+ } |
+ // Normal character, just advance past it. |
+ Advance(); |
+ } |
+ // If we ran out of characters, copy over what we have so far. |
+ if (last < p_.data()) { |
+ parsed_storage_.append(last, p_.data() - last); |
+ } |
+ // If we didn't find the closing quote but we expect more data, cancel for now |
+ if (!finishing_) { |
+ return util::Status::CANCELLED; |
+ } |
+ // End of string reached without a closing quote, report an error. |
+ string_open_ = 0; |
+ return ReportFailure("Closing quote expected in string."); |
+} |
+ |
+// Converts a unicode escaped character to a decimal value stored in a char32 |
+// for use in UTF8 encoding utility. We assume that str begins with \uhhhh and |
+// convert that from the hex number to a decimal value. |
+// |
+// There are some security exploits with UTF-8 that we should be careful of: |
+// - http://www.unicode.org/reports/tr36/#UTF-8_Exploit |
+// - http://sites/intl-eng/design-guide/core-application |
+util::Status JsonStreamParser::ParseUnicodeEscape() { |
+ if (p_.length() < kUnicodeEscapedLength) { |
+ if (!finishing_) { |
+ return util::Status::CANCELLED; |
+ } |
+ return ReportFailure("Illegal hex string."); |
+ } |
+ GOOGLE_DCHECK_EQ('\\', p_.data()[0]); |
+ GOOGLE_DCHECK_EQ('u', p_.data()[1]); |
+ uint32 code = 0; |
+ for (int i = 2; i < kUnicodeEscapedLength; ++i) { |
+ if (!isxdigit(p_.data()[i])) { |
+ return ReportFailure("Invalid escape sequence."); |
+ } |
+ code = (code << 4) + hex_digit_to_int(p_.data()[i]); |
+ } |
+ char buf[UTFmax]; |
+ int len = EncodeAsUTF8Char(code, buf); |
+ // Advance past the unicode escape. |
+ p_.remove_prefix(kUnicodeEscapedLength); |
+ parsed_storage_.append(buf, len); |
+ return util::Status::OK; |
+} |
+ |
+util::Status JsonStreamParser::ParseNumber() { |
+ NumberResult number; |
+ util::Status result = ParseNumberHelper(&number); |
+ if (result.ok()) { |
+ switch (number.type) { |
+ case NumberResult::DOUBLE: |
+ ow_->RenderDouble(key_, number.double_val); |
+ key_.clear(); |
+ break; |
+ |
+ case NumberResult::INT: |
+ ow_->RenderInt64(key_, number.int_val); |
+ key_.clear(); |
+ break; |
+ |
+ case NumberResult::UINT: |
+ ow_->RenderUint64(key_, number.uint_val); |
+ key_.clear(); |
+ break; |
+ |
+ default: |
+ return ReportFailure("Unable to parse number."); |
+ } |
+ } |
+ return result; |
+} |
+ |
+util::Status JsonStreamParser::ParseNumberHelper(NumberResult* result) { |
+ const char* data = p_.data(); |
+ int length = p_.length(); |
+ |
+ // Look for the first non-numeric character, or the end of the string. |
+ int index = 0; |
+ bool floating = false; |
+ bool negative = data[index] == '-'; |
+ // Find the first character that cannot be part of the number. Along the way |
+ // detect if the number needs to be parsed as a double. |
+ // Note that this restricts numbers to the JSON specification, so for example |
+ // we do not support hex or octal notations. |
+ for (; index < length; ++index) { |
+ char c = data[index]; |
+ if (isdigit(c)) continue; |
+ if (c == '.' || c == 'e' || c == 'E') { |
+ floating = true; |
+ continue; |
+ } |
+ if (c == '+' || c == '-') continue; |
+ // Not a valid number character, break out. |
+ break; |
+ } |
+ |
+ // If the entire input is a valid number, and we may have more content in the |
+ // future, we abort for now and resume when we know more. |
+ if (index == length && !finishing_) { |
+ return util::Status::CANCELLED; |
+ } |
+ |
+ // Create a string containing just the number, so we can use safe_strtoX |
+ string number = p_.substr(0, index).ToString(); |
+ |
+ // Floating point number, parse as a double. |
+ if (floating) { |
+ if (!safe_strtod(number, &result->double_val)) { |
+ return ReportFailure("Unable to parse number."); |
+ } |
+ result->type = NumberResult::DOUBLE; |
+ p_.remove_prefix(index); |
+ return util::Status::OK; |
+ } |
+ |
+ // Positive non-floating point number, parse as a uint64. |
+ if (!negative) { |
+ if (!safe_strtou64(number, &result->uint_val)) { |
+ return ReportFailure("Unable to parse number."); |
+ } |
+ result->type = NumberResult::UINT; |
+ p_.remove_prefix(index); |
+ return util::Status::OK; |
+ } |
+ |
+ // Negative non-floating point number, parse as an int64. |
+ if (!safe_strto64(number, &result->int_val)) { |
+ return ReportFailure("Unable to parse number."); |
+ } |
+ result->type = NumberResult::INT; |
+ p_.remove_prefix(index); |
+ return util::Status::OK; |
+} |
+ |
+util::Status JsonStreamParser::HandleBeginObject() { |
+ GOOGLE_DCHECK_EQ('{', *p_.data()); |
+ Advance(); |
+ ow_->StartObject(key_); |
+ key_.clear(); |
+ stack_.push(ENTRY); |
+ return util::Status::OK; |
+} |
+ |
+util::Status JsonStreamParser::ParseObjectMid(TokenType type) { |
+ if (type == UNKNOWN) { |
+ return ReportUnknown("Expected , or } after key:value pair."); |
+ } |
+ |
+ // Object is complete, advance past the comma and render the EndObject. |
+ if (type == END_OBJECT) { |
+ Advance(); |
+ ow_->EndObject(); |
+ return util::Status::OK; |
+ } |
+ // Found a comma, advance past it and get ready for an entry. |
+ if (type == VALUE_SEPARATOR) { |
+ Advance(); |
+ stack_.push(ENTRY); |
+ return util::Status::OK; |
+ } |
+ // Illegal token after key:value pair. |
+ return ReportFailure("Expected , or } after key:value pair."); |
+} |
+ |
+util::Status JsonStreamParser::ParseEntry(TokenType type) { |
+ if (type == UNKNOWN) { |
+ return ReportUnknown("Expected an object key or }."); |
+ } |
+ |
+ // Close the object and return. This allows for trailing commas. |
+ if (type == END_OBJECT) { |
+ ow_->EndObject(); |
+ Advance(); |
+ return util::Status::OK; |
+ } |
+ |
+ util::Status result; |
+ if (type == BEGIN_STRING) { |
+ // Key is a string (standard JSON), parse it and store the string. |
+ result = ParseStringHelper(); |
+ if (result.ok()) { |
+ key_storage_.clear(); |
+ if (!parsed_storage_.empty()) { |
+ parsed_storage_.swap(key_storage_); |
+ key_ = StringPiece(key_storage_); |
+ } else { |
+ key_ = parsed_; |
+ } |
+ parsed_.clear(); |
+ } |
+ } else if (type == BEGIN_KEY) { |
+ // Key is a bare key (back compat), create a StringPiece pointing to it. |
+ result = ParseKey(); |
+ } else { |
+ // Unknown key type, report an error. |
+ result = ReportFailure("Expected an object key or }."); |
+ } |
+ // On success we next expect an entry mid ':' then an object mid ',' or '}' |
+ if (result.ok()) { |
+ stack_.push(OBJ_MID); |
+ stack_.push(ENTRY_MID); |
+ } |
+ return result; |
+} |
+ |
+util::Status JsonStreamParser::ParseEntryMid(TokenType type) { |
+ if (type == UNKNOWN) { |
+ return ReportUnknown("Expected : between key:value pair."); |
+ } |
+ if (type == ENTRY_SEPARATOR) { |
+ Advance(); |
+ stack_.push(VALUE); |
+ return util::Status::OK; |
+ } |
+ return ReportFailure("Expected : between key:value pair."); |
+} |
+ |
+util::Status JsonStreamParser::HandleBeginArray() { |
+ GOOGLE_DCHECK_EQ('[', *p_.data()); |
+ Advance(); |
+ ow_->StartList(key_); |
+ key_.clear(); |
+ stack_.push(ARRAY_VALUE); |
+ return util::Status::OK; |
+} |
+ |
+util::Status JsonStreamParser::ParseArrayValue(TokenType type) { |
+ if (type == UNKNOWN) { |
+ return ReportUnknown("Expected a value or ] within an array."); |
+ } |
+ |
+ if (type == END_ARRAY) { |
+ ow_->EndList(); |
+ Advance(); |
+ return util::Status::OK; |
+ } |
+ |
+ // The ParseValue call may push something onto the stack so we need to make |
+ // sure an ARRAY_MID is after it, so we push it on now. |
+ stack_.push(ARRAY_MID); |
+ util::Status result = ParseValue(type); |
+ if (result == util::Status::CANCELLED) { |
+ // If we were cancelled, pop back off the ARRAY_MID so we don't try to |
+ // push it on again when we try over. |
+ stack_.pop(); |
+ } |
+ return result; |
+} |
+ |
+util::Status JsonStreamParser::ParseArrayMid(TokenType type) { |
+ if (type == UNKNOWN) { |
+ return ReportUnknown("Expected , or ] after array value."); |
+ } |
+ |
+ if (type == END_ARRAY) { |
+ ow_->EndList(); |
+ Advance(); |
+ return util::Status::OK; |
+ } |
+ |
+ // Found a comma, advance past it and expect an array value next. |
+ if (type == VALUE_SEPARATOR) { |
+ Advance(); |
+ stack_.push(ARRAY_VALUE); |
+ return util::Status::OK; |
+ } |
+ // Illegal token after array value. |
+ return ReportFailure("Expected , or ] after array value."); |
+} |
+ |
+util::Status JsonStreamParser::ParseTrue() { |
+ ow_->RenderBool(key_, true); |
+ key_.clear(); |
+ p_.remove_prefix(true_len); |
+ return util::Status::OK; |
+} |
+ |
+util::Status JsonStreamParser::ParseFalse() { |
+ ow_->RenderBool(key_, false); |
+ key_.clear(); |
+ p_.remove_prefix(false_len); |
+ return util::Status::OK; |
+} |
+ |
+util::Status JsonStreamParser::ParseNull() { |
+ ow_->RenderNull(key_); |
+ key_.clear(); |
+ p_.remove_prefix(null_len); |
+ return util::Status::OK; |
+} |
+ |
+util::Status JsonStreamParser::ReportFailure(StringPiece message) { |
+ static const int kContextLength = 20; |
+ const char* p_start = p_.data(); |
+ const char* json_start = json_.data(); |
+ const char* begin = max(p_start - kContextLength, json_start); |
+ const char* end = min(p_start + kContextLength, json_start + json_.size()); |
+ StringPiece segment(begin, end - begin); |
+ string location(p_start - begin, ' '); |
+ location.push_back('^'); |
+ return util::Status(util::error::INVALID_ARGUMENT, |
+ StrCat(message, "\n", segment, "\n", location)); |
+} |
+ |
+util::Status JsonStreamParser::ReportUnknown(StringPiece message) { |
+ // If we aren't finishing the parse, cancel parsing and try later. |
+ if (!finishing_) { |
+ return util::Status::CANCELLED; |
+ } |
+ if (p_.empty()) { |
+ return ReportFailure(StrCat("Unexpected end of string. ", message)); |
+ } |
+ return ReportFailure(message); |
+} |
+ |
+void JsonStreamParser::SkipWhitespace() { |
+ while (!p_.empty() && ascii_isspace(*p_.data())) { |
+ Advance(); |
+ } |
+} |
+ |
+void JsonStreamParser::Advance() { |
+ // Advance by moving one UTF8 character while making sure we don't go beyond |
+ // the length of StringPiece. |
+ p_.remove_prefix( |
+ min<int>(p_.length(), UTF8FirstLetterNumBytes(p_.data(), p_.length()))); |
+} |
+ |
+util::Status JsonStreamParser::ParseKey() { |
+ StringPiece original = p_; |
+ if (!ConsumeKey(&p_, &key_)) { |
+ return ReportFailure("Invalid key or variable name."); |
+ } |
+ // If we consumed everything but expect more data, reset p_ and cancel since |
+ // we can't know if the key was complete or not. |
+ if (!finishing_ && p_.empty()) { |
+ p_ = original; |
+ return util::Status::CANCELLED; |
+ } |
+ // Since we aren't using the key storage, clear it out. |
+ key_storage_.clear(); |
+ return util::Status::OK; |
+} |
+ |
+JsonStreamParser::TokenType JsonStreamParser::GetNextTokenType() { |
+ SkipWhitespace(); |
+ |
+ int size = p_.size(); |
+ if (size == 0) { |
+ // If we ran out of data, report unknown and we'll place the previous parse |
+ // type onto the stack and try again when we have more data. |
+ return UNKNOWN; |
+ } |
+ // TODO(sven): Split this method based on context since different contexts |
+ // support different tokens. Would slightly speed up processing? |
+ const char* data = p_.data(); |
+ if (*data == '\"' || *data == '\'') return BEGIN_STRING; |
+ if (*data == '-' || ('0' <= *data && *data <= '9')) { |
+ return BEGIN_NUMBER; |
+ } |
+ if (size >= true_len && !strncmp(data, "true", true_len)) { |
+ return BEGIN_TRUE; |
+ } |
+ if (size >= false_len && !strncmp(data, "false", false_len)) { |
+ return BEGIN_FALSE; |
+ } |
+ if (size >= null_len && !strncmp(data, "null", null_len)) { |
+ return BEGIN_NULL; |
+ } |
+ if (*data == '{') return BEGIN_OBJECT; |
+ if (*data == '}') return END_OBJECT; |
+ if (*data == '[') return BEGIN_ARRAY; |
+ if (*data == ']') return END_ARRAY; |
+ if (*data == ':') return ENTRY_SEPARATOR; |
+ if (*data == ',') return VALUE_SEPARATOR; |
+ if (MatchKey(p_)) { |
+ return BEGIN_KEY; |
+ } |
+ |
+ // We don't know that we necessarily have an invalid token here, just that we |
+ // can't parse what we have so far. So we don't report an error and just |
+ // return UNKNOWN so we can try again later when we have more data, or if we |
+ // finish and we have leftovers. |
+ return UNKNOWN; |
+} |
+ |
+} // namespace converter |
+} // namespace util |
+} // namespace protobuf |
+} // namespace google |