| Index: third_party/protobuf/src/google/protobuf/io/tokenizer.cc
|
| diff --git a/third_party/protobuf/src/google/protobuf/io/tokenizer.cc b/third_party/protobuf/src/google/protobuf/io/tokenizer.cc
|
| index a022b71db42ff9a6fa8500ab5b89583eff284480..3d57707c127f021d390d23db3e0001cb728e840c 100644
|
| --- a/third_party/protobuf/src/google/protobuf/io/tokenizer.cc
|
| +++ b/third_party/protobuf/src/google/protobuf/io/tokenizer.cc
|
| @@ -1,6 +1,6 @@
|
| // Protocol Buffers - Google's data interchange format
|
| // Copyright 2008 Google Inc. All rights reserved.
|
| -// http://code.google.com/p/protobuf/
|
| +// https://developers.google.com/protocol-buffers/
|
| //
|
| // Redistribution and use in source and binary forms, with or without
|
| // modification, are permitted provided that the following conditions are
|
| @@ -90,7 +90,9 @@
|
|
|
| #include <google/protobuf/io/tokenizer.h>
|
| #include <google/protobuf/stubs/common.h>
|
| +#include <google/protobuf/stubs/logging.h>
|
| #include <google/protobuf/stubs/stringprintf.h>
|
| +#include <google/protobuf/io/strtod.h>
|
| #include <google/protobuf/io/zero_copy_stream.h>
|
| #include <google/protobuf/stubs/strutil.h>
|
| #include <google/protobuf/stubs/stl_util.h>
|
| @@ -195,7 +197,9 @@ Tokenizer::Tokenizer(ZeroCopyInputStream* input,
|
| record_target_(NULL),
|
| record_start_(-1),
|
| allow_f_after_float_(false),
|
| - comment_style_(CPP_COMMENT_STYLE) {
|
| + comment_style_(CPP_COMMENT_STYLE),
|
| + require_space_after_number_(true),
|
| + allow_multiline_strings_(false) {
|
|
|
| current_.line = 0;
|
| current_.column = 0;
|
| @@ -350,9 +354,16 @@ void Tokenizer::ConsumeString(char delimiter) {
|
| while (true) {
|
| switch (current_char_) {
|
| case '\0':
|
| - case '\n': {
|
| - AddError("String literals cannot cross line boundaries.");
|
| + AddError("Unexpected end of string.");
|
| return;
|
| +
|
| + case '\n': {
|
| + if (!allow_multiline_strings_) {
|
| + AddError("String literals cannot cross line boundaries.");
|
| + return;
|
| + }
|
| + NextChar();
|
| + break;
|
| }
|
|
|
| case '\\': {
|
| @@ -364,7 +375,7 @@ void Tokenizer::ConsumeString(char delimiter) {
|
| // Possibly followed by two more octal digits, but these will
|
| // just be consumed by the main loop anyway so we don't need
|
| // to do so explicitly here.
|
| - } else if (TryConsume('x') || TryConsume('X')) {
|
| + } else if (TryConsume('x')) {
|
| if (!TryConsumeOne<HexDigit>()) {
|
| AddError("Expected hex digits for escape sequence.");
|
| }
|
| @@ -449,7 +460,7 @@ Tokenizer::TokenType Tokenizer::ConsumeNumber(bool started_with_zero,
|
| }
|
| }
|
|
|
| - if (LookingAt<Letter>()) {
|
| + if (LookingAt<Letter>() && require_space_after_number_) {
|
| AddError("Need space between number and identifier.");
|
| } else if (current_char_ == '.') {
|
| if (is_float) {
|
| @@ -618,6 +629,12 @@ bool Tokenizer::Next() {
|
| ConsumeString('\'');
|
| current_.type = TYPE_STRING;
|
| } else {
|
| + // Check if the high order bit is set.
|
| + if (current_char_ & 0x80) {
|
| + error_collector_->AddError(line_, column_,
|
| + StringPrintf("Interpreting non ascii codepoint %d.",
|
| + static_cast<unsigned char>(current_char_)));
|
| + }
|
| NextChar();
|
| current_.type = TYPE_SYMBOL;
|
| }
|
| @@ -746,6 +763,15 @@ bool Tokenizer::NextWithComments(string* prev_trailing_comments,
|
| next_leading_comments);
|
|
|
| if (current_.type == TYPE_START) {
|
| + // Ignore unicode byte order mark(BOM) if it appears at the file
|
| + // beginning. Only UTF-8 BOM (0xEF 0xBB 0xBF) is accepted.
|
| + if (TryConsume((char)0xEF)) {
|
| + if (!TryConsume((char)0xBB) || !TryConsume((char)0xBF)) {
|
| + AddError("Proto file starts with 0xEF but not UTF-8 BOM. "
|
| + "Only UTF-8 is accepted for proto file.");
|
| + return false;
|
| + }
|
| + }
|
| collector.DetachFromPrev();
|
| } else {
|
| // A comment appearing on the same line must be attached to the previous
|
| @@ -1086,6 +1112,26 @@ void Tokenizer::ParseStringAppend(const string& text, string* output) {
|
| }
|
| }
|
|
|
| +template<typename CharacterClass>
|
| +static bool AllInClass(const string& s) {
|
| + for (int i = 0; i < s.size(); ++i) {
|
| + if (!CharacterClass::InClass(s[i]))
|
| + return false;
|
| + }
|
| + return true;
|
| +}
|
| +
|
| +bool Tokenizer::IsIdentifier(const string& text) {
|
| + // Mirrors IDENTIFIER definition in Tokenizer::Next() above.
|
| + if (text.size() == 0)
|
| + return false;
|
| + if (!Letter::InClass(text.at(0)))
|
| + return false;
|
| + if (!AllInClass<Alphanumeric>(text.substr(1)))
|
| + return false;
|
| + return true;
|
| +}
|
| +
|
| } // namespace io
|
| } // namespace protobuf
|
| } // namespace google
|
|
|