third_party/protobuf/src/google/protobuf/stubs/strutil.cc - Issue 1322483002: Revert https://codereview.chromium.org/1291903002 (protobuf roll).

Unified Diff: third_party/protobuf/src/google/protobuf/stubs/strutil.cc

Issue 1322483002: Revert https://codereview.chromium.org/1291903002 (protobuf roll). (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 5 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « third_party/protobuf/src/google/protobuf/stubs/strutil.h ('k') | third_party/protobuf/src/google/protobuf/stubs/strutil_unittest.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: third_party/protobuf/src/google/protobuf/stubs/strutil.cc

diff --git a/third_party/protobuf/src/google/protobuf/stubs/strutil.cc b/third_party/protobuf/src/google/protobuf/stubs/strutil.cc

index 8442f2cecbf4c552e43bfe36a104147269ae384e..00d1bc633d58d0952afa5a984062bae5f5873d64 100644

--- a/third_party/protobuf/src/google/protobuf/stubs/strutil.cc

+++ b/third_party/protobuf/src/google/protobuf/stubs/strutil.cc

@@ -1,6 +1,6 @@

// Protocol Buffers - Google's data interchange format

-// https://developers.google.com/protocol-buffers/

+// http://code.google.com/p/protobuf/

// Redistribution and use in source and binary forms, with or without

// modification, are permitted provided that the following conditions are

@@ -31,8 +31,6 @@

// from google3/strings/strutil.cc

#include <google/protobuf/stubs/strutil.h>

-#include <google/protobuf/stubs/mathlimits.h>

#include <errno.h>

#include <float.h> // FLT_DIG and DBL_DIG

#include <limits>

@@ -40,8 +38,6 @@

#include <stdio.h>

#include <iterator>

-#include <google/protobuf/stubs/stl_util.h>

#ifdef _WIN32

// MSVC has only _snprintf, not snprintf.

@@ -59,6 +55,11 @@

namespace google {

namespace protobuf {

+inline bool IsNaN(double value) {

+ // NaN is never equal to anything, even itself.

+ return value != value;

// These are defined as macros on some platforms. #undef them so that we can

// redefine them.

#undef isxdigit

@@ -93,34 +94,6 @@ void StripString(string* s, const char* remove, char replacewith) {

}

-void StripWhitespace(string* str) {

- int str_length = str->length();

- // Strip off leading whitespace.

- int first = 0;

- while (first < str_length && ascii_isspace(str->at(first))) {

- ++first;

- }

- // If entire string is white space.

- if (first == str_length) {

- str->clear();

- return;

- }

- if (first > 0) {

- str->erase(0, first);

- str_length -= first;

- }

- // Strip off trailing whitespace.

- int last = str_length - 1;

- while (last >= 0 && ascii_isspace(str->at(last))) {

- --last;

- }

- if (last != (str_length - 1) && last >= 0) {

- str->erase(last + 1, string::npos);

- }

// ----------------------------------------------------------------------

// StringReplace()

// Replace the "old" pattern with the "new" pattern in a string,

@@ -308,6 +281,17 @@ void JoinStrings(const vector<string>& components,

#define IS_OCTAL_DIGIT(c) (((c) >= '0') && ((c) <= '7'))

+inline int hex_digit_to_int(char c) {

+ /* Assume ASCII. */

+ assert('0' == 0x30 && 'A' == 0x41 && 'a' == 0x61);

+ assert(isxdigit(c));

+ int x = static_cast<unsigned char>(c);

+ if (x > '9') {

+ x += 9;

+ }

+ return x & 0xf;

// Protocol buffers doesn't ever care about errors, but I don't want to remove

// the code.

#define LOG_STRING(LEVEL, VECTOR) GOOGLE_LOG_IF(LEVEL, false)

@@ -612,133 +596,6 @@ uint32 strtou32_adaptor(const char *nptr, char **endptr, int base) {

return static_cast<uint32>(result);

}

-inline bool safe_parse_sign(string* text /*inout*/,

- bool* negative_ptr /*output*/) {

- const char* start = text->data();

- const char* end = start + text->size();

- // Consume whitespace.

- while (start < end && (start[0] == ' ')) {

- ++start;

- }

- while (start < end && (end[-1] == ' ')) {

- --end;

- }

- if (start >= end) {

- return false;

- }

- // Consume sign.

- *negative_ptr = (start[0] == '-');

- if (*negative_ptr || start[0] == '+') {

- ++start;

- if (start >= end) {

- return false;

- }

- *text = text->substr(start - text->data(), end - start);

- return true;

-template<typename IntType>

-bool safe_parse_positive_int(

- string text, IntType* value_p) {

- int base = 10;

- IntType value = 0;

- const IntType vmax = std::numeric_limits<IntType>::max();

- assert(vmax > 0);

- assert(vmax >= base);

- const IntType vmax_over_base = vmax / base;

- const char* start = text.data();

- const char* end = start + text.size();

- // loop over digits

- for (; start < end; ++start) {

- unsigned char c = static_cast<unsigned char>(start[0]);

- int digit = c - '0';

- if (digit >= base || digit < 0) {

- *value_p = value;

- return false;

- }

- if (value > vmax_over_base) {

- *value_p = vmax;

- return false;

- }

- value *= base;

- if (value > vmax - digit) {

- *value_p = vmax;

- return false;

- }

- value += digit;

- }

- *value_p = value;

- return true;

-template<typename IntType>

-bool safe_parse_negative_int(

- const string& text, IntType* value_p) {

- int base = 10;

- IntType value = 0;

- const IntType vmin = std::numeric_limits<IntType>::min();

- assert(vmin < 0);

- assert(vmin <= 0 - base);

- IntType vmin_over_base = vmin / base;

- // 2003 c++ standard [expr.mul]

- // "... the sign of the remainder is implementation-defined."

- // Although (vmin/base)*base + vmin%base is always vmin.

- // 2011 c++ standard tightens the spec but we cannot rely on it.

- if (vmin % base > 0) {

- vmin_over_base += 1;

- }

- const char* start = text.data();

- const char* end = start + text.size();

- // loop over digits

- for (; start < end; ++start) {

- unsigned char c = static_cast<unsigned char>(start[0]);

- int digit = c - '0';

- if (digit >= base || digit < 0) {

- *value_p = value;

- return false;

- }

- if (value < vmin_over_base) {

- *value_p = vmin;

- return false;

- }

- value *= base;

- if (value < vmin + digit) {

- *value_p = vmin;

- return false;

- }

- value -= digit;

- }

- *value_p = value;

- return true;

-template<typename IntType>

-bool safe_int_internal(string text, IntType* value_p) {

- *value_p = 0;

- bool negative;

- if (!safe_parse_sign(&text, &negative)) {

- return false;

- }

- if (!negative) {

- return safe_parse_positive_int(text, value_p);

- } else {

- return safe_parse_negative_int(text, value_p);

- }

-template<typename IntType>

-bool safe_uint_internal(string text, IntType* value_p) {

- *value_p = 0;

- bool negative;

- if (!safe_parse_sign(&text, &negative) || negative) {

- return false;

- }

- return safe_parse_positive_int(text, value_p);

// ----------------------------------------------------------------------

// FastIntToBuffer()

// FastInt64ToBuffer()

@@ -1199,7 +1056,7 @@ char* DoubleToBuffer(double value, char* buffer) {

} else if (value == -numeric_limits<double>::infinity()) {

strcpy(buffer, "-inf");

return buffer;

- } else if (MathLimits<double>::IsNaN(value)) {

+ } else if (IsNaN(value)) {

strcpy(buffer, "nan");

return buffer;

}

@@ -1230,41 +1087,6 @@ char* DoubleToBuffer(double value, char* buffer) {

return buffer;

}

-static int memcasecmp(const char *s1, const char *s2, size_t len) {

- const unsigned char *us1 = reinterpret_cast<const unsigned char *>(s1);

- const unsigned char *us2 = reinterpret_cast<const unsigned char *>(s2);

- for ( int i = 0; i < len; i++ ) {

- const int diff =

- static_cast<int>(static_cast<unsigned char>(ascii_tolower(us1[i]))) -

- static_cast<int>(static_cast<unsigned char>(ascii_tolower(us2[i])));

- if (diff != 0) return diff;

- }

- return 0;

-inline bool CaseEqual(StringPiece s1, StringPiece s2) {

- if (s1.size() != s2.size()) return false;

- return memcasecmp(s1.data(), s2.data(), s1.size()) == 0;

-bool safe_strtob(StringPiece str, bool* value) {

- GOOGLE_CHECK(value != NULL) << "NULL output boolean given.";

- if (CaseEqual(str, "true") || CaseEqual(str, "t") ||

- CaseEqual(str, "yes") || CaseEqual(str, "y") ||

- CaseEqual(str, "1")) {

- *value = true;

- return true;

- }

- if (CaseEqual(str, "false") || CaseEqual(str, "f") ||

- CaseEqual(str, "no") || CaseEqual(str, "n") ||

- CaseEqual(str, "0")) {

- *value = false;

- return true;

- }

- return false;

bool safe_strtof(const char* str, float* value) {

char* endptr;

errno = 0; // errno only gets set on errors

@@ -1276,34 +1098,6 @@ bool safe_strtof(const char* str, float* value) {

return *str != 0 && *endptr == 0 && errno == 0;

}

-bool safe_strtod(const char* str, double* value) {

- char* endptr;

- *value = strtod(str, &endptr);

- if (endptr != str) {

- while (ascii_isspace(*endptr)) ++endptr;

- }

- // Ignore range errors from strtod. The values it

- // returns on underflow and overflow are the right

- // fallback in a robust setting.

- return *str != '\0' && *endptr == '\0';

-bool safe_strto32(const string& str, int32* value) {

- return safe_int_internal(str, value);

-bool safe_strtou32(const string& str, uint32* value) {

- return safe_uint_internal(str, value);

-bool safe_strto64(const string& str, int64* value) {

- return safe_int_internal(str, value);

-bool safe_strtou64(const string& str, uint64* value) {

- return safe_uint_internal(str, value);

char* FloatToBuffer(float value, char* buffer) {

// FLT_DIG is 6 for IEEE-754 floats, which are used on almost all

// platforms these days. Just in case some system exists where FLT_DIG

@@ -1317,7 +1111,7 @@ char* FloatToBuffer(float value, char* buffer) {

} else if (value == -numeric_limits<double>::infinity()) {

strcpy(buffer, "-inf");

return buffer;

- } else if (MathLimits<float>::IsNaN(value)) {

+ } else if (IsNaN(value)) {

strcpy(buffer, "nan");

return buffer;

}

@@ -1342,893 +1136,68 @@ char* FloatToBuffer(float value, char* buffer) {

return buffer;

}

-namespace strings {

-AlphaNum::AlphaNum(strings::Hex hex) {

- char *const end = &digits[kFastToBufferSize];

- char *writer = end;

- uint64 value = hex.value;

- uint64 width = hex.spec;

- // We accomplish minimum width by OR'ing in 0x10000 to the user's value,

- // where 0x10000 is the smallest hex number that is as wide as the user

- // asked for.

- uint64 mask = ((static_cast<uint64>(1) << (width - 1) * 4)) | value;

- static const char hexdigits[] = "0123456789abcdef";

- do {

- *--writer = hexdigits[value & 0xF];

- value >>= 4;

- mask >>= 4;

- } while (mask != 0);

- piece_data_ = writer;

- piece_size_ = end - writer;

-} // namespace strings

// ----------------------------------------------------------------------

-// StrCat()

-// This merges the given strings or integers, with no delimiter. This

-// is designed to be the fastest possible way to construct a string out

-// of a mix of raw C strings, C++ strings, and integer values.

+// NoLocaleStrtod()

+// This code will make you cry.

// ----------------------------------------------------------------------

-// Append is merely a version of memcpy that returns the address of the byte

-// after the area just overwritten. It comes in multiple flavors to minimize

-// call overhead.

-static char *Append1(char *out, const AlphaNum &x) {

- memcpy(out, x.data(), x.size());

- return out + x.size();

-static char *Append2(char *out, const AlphaNum &x1, const AlphaNum &x2) {

- memcpy(out, x1.data(), x1.size());

- out += x1.size();

- memcpy(out, x2.data(), x2.size());

- return out + x2.size();

-static char *Append4(char *out,

- const AlphaNum &x1, const AlphaNum &x2,

- const AlphaNum &x3, const AlphaNum &x4) {

- memcpy(out, x1.data(), x1.size());

- out += x1.size();

- memcpy(out, x2.data(), x2.size());

- out += x2.size();

- memcpy(out, x3.data(), x3.size());

- out += x3.size();

- memcpy(out, x4.data(), x4.size());

- return out + x4.size();

-string StrCat(const AlphaNum &a, const AlphaNum &b) {

- string result;

- result.resize(a.size() + b.size());

- char *const begin = &*result.begin();

- char *out = Append2(begin, a, b);

- GOOGLE_DCHECK_EQ(out, begin + result.size());

- return result;

-string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c) {

- string result;

- result.resize(a.size() + b.size() + c.size());

- char *const begin = &*result.begin();

- char *out = Append2(begin, a, b);

- out = Append1(out, c);

- GOOGLE_DCHECK_EQ(out, begin + result.size());

- return result;

-string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,

- const AlphaNum &d) {

- string result;

- result.resize(a.size() + b.size() + c.size() + d.size());

- char *const begin = &*result.begin();

- char *out = Append4(begin, a, b, c, d);

- GOOGLE_DCHECK_EQ(out, begin + result.size());

- return result;

-string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,

- const AlphaNum &d, const AlphaNum &e) {

- string result;

- result.resize(a.size() + b.size() + c.size() + d.size() + e.size());

- char *const begin = &*result.begin();

- char *out = Append4(begin, a, b, c, d);

- out = Append1(out, e);

- GOOGLE_DCHECK_EQ(out, begin + result.size());

- return result;

-string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,

- const AlphaNum &d, const AlphaNum &e, const AlphaNum &f) {

- string result;

- result.resize(a.size() + b.size() + c.size() + d.size() + e.size() +

- f.size());

- char *const begin = &*result.begin();

- char *out = Append4(begin, a, b, c, d);

- out = Append2(out, e, f);

- GOOGLE_DCHECK_EQ(out, begin + result.size());

- return result;

-string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,

- const AlphaNum &d, const AlphaNum &e, const AlphaNum &f,

- const AlphaNum &g) {

+// Returns a string identical to *input except that the character pointed to

+// by radix_pos (which should be '.') is replaced with the locale-specific

+// radix character.

+string LocalizeRadix(const char* input, const char* radix_pos) {

+ // Determine the locale-specific radix character by calling sprintf() to

+ // print the number 1.5, then stripping off the digits. As far as I can

+ // tell, this is the only portable, thread-safe way to get the C library

+ // to divuldge the locale's radix character. No, localeconv() is NOT

+ // thread-safe.

+ char temp[16];

+ int size = sprintf(temp, "%.1f", 1.5);

+ GOOGLE_CHECK_EQ(temp[0], '1');

+ GOOGLE_CHECK_EQ(temp[size-1], '5');

+ GOOGLE_CHECK_LE(size, 6);

+ // Now replace the '.' in the input with it.

string result;

- result.resize(a.size() + b.size() + c.size() + d.size() + e.size() +

- f.size() + g.size());

- char *const begin = &*result.begin();

- char *out = Append4(begin, a, b, c, d);

- out = Append2(out, e, f);

- out = Append1(out, g);

- GOOGLE_DCHECK_EQ(out, begin + result.size());

+ result.reserve(strlen(input) + size - 3);

+ result.append(input, radix_pos);

+ result.append(temp + 1, size - 2);

+ result.append(radix_pos + 1);

return result;

}

-string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,

- const AlphaNum &d, const AlphaNum &e, const AlphaNum &f,

- const AlphaNum &g, const AlphaNum &h) {

- string result;

- result.resize(a.size() + b.size() + c.size() + d.size() + e.size() +

- f.size() + g.size() + h.size());

- char *const begin = &*result.begin();

- char *out = Append4(begin, a, b, c, d);

- out = Append4(out, e, f, g, h);

- GOOGLE_DCHECK_EQ(out, begin + result.size());

- return result;

-string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,

- const AlphaNum &d, const AlphaNum &e, const AlphaNum &f,

- const AlphaNum &g, const AlphaNum &h, const AlphaNum &i) {

- string result;

- result.resize(a.size() + b.size() + c.size() + d.size() + e.size() +

- f.size() + g.size() + h.size() + i.size());

- char *const begin = &*result.begin();

- char *out = Append4(begin, a, b, c, d);

- out = Append4(out, e, f, g, h);

- out = Append1(out, i);

- GOOGLE_DCHECK_EQ(out, begin + result.size());

- return result;

-// It's possible to call StrAppend with a char * pointer that is partway into

-// the string we're appending to. However the results of this are random.

-// Therefore, check for this in debug mode. Use unsigned math so we only have

-// to do one comparison.

-#define GOOGLE_DCHECK_NO_OVERLAP(dest, src) \

- GOOGLE_DCHECK_GT(uintptr_t((src).data() - (dest).data()), \

- uintptr_t((dest).size()))

-void StrAppend(string *result, const AlphaNum &a) {

- GOOGLE_DCHECK_NO_OVERLAP(*result, a);

- result->append(a.data(), a.size());

-void StrAppend(string *result, const AlphaNum &a, const AlphaNum &b) {

- GOOGLE_DCHECK_NO_OVERLAP(*result, a);

- GOOGLE_DCHECK_NO_OVERLAP(*result, b);

- string::size_type old_size = result->size();

- result->resize(old_size + a.size() + b.size());

- char *const begin = &*result->begin();

- char *out = Append2(begin + old_size, a, b);

- GOOGLE_DCHECK_EQ(out, begin + result->size());

-void StrAppend(string *result,

- const AlphaNum &a, const AlphaNum &b, const AlphaNum &c) {

- GOOGLE_DCHECK_NO_OVERLAP(*result, a);

- GOOGLE_DCHECK_NO_OVERLAP(*result, b);

- GOOGLE_DCHECK_NO_OVERLAP(*result, c);

- string::size_type old_size = result->size();

- result->resize(old_size + a.size() + b.size() + c.size());

- char *const begin = &*result->begin();

- char *out = Append2(begin + old_size, a, b);

- out = Append1(out, c);

- GOOGLE_DCHECK_EQ(out, begin + result->size());

-void StrAppend(string *result,

- const AlphaNum &a, const AlphaNum &b,

- const AlphaNum &c, const AlphaNum &d) {

- GOOGLE_DCHECK_NO_OVERLAP(*result, a);

- GOOGLE_DCHECK_NO_OVERLAP(*result, b);

- GOOGLE_DCHECK_NO_OVERLAP(*result, c);

- GOOGLE_DCHECK_NO_OVERLAP(*result, d);

- string::size_type old_size = result->size();

- result->resize(old_size + a.size() + b.size() + c.size() + d.size());

- char *const begin = &*result->begin();

- char *out = Append4(begin + old_size, a, b, c, d);

- GOOGLE_DCHECK_EQ(out, begin + result->size());

-int GlobalReplaceSubstring(const string& substring,

- const string& replacement,

- string* s) {

- GOOGLE_CHECK(s != NULL);

- if (s->empty() || substring.empty())

- return 0;

- string tmp;

- int num_replacements = 0;

- int pos = 0;

- for (int match_pos = s->find(substring.data(), pos, substring.length());

- match_pos != string::npos;

- pos = match_pos + substring.length(),

- match_pos = s->find(substring.data(), pos, substring.length())) {

- ++num_replacements;

- // Append the original content before the match.

- tmp.append(*s, pos, match_pos - pos);

- // Append the replacement for the match.

- tmp.append(replacement.begin(), replacement.end());

- }

- // Append the content after the last match. If no replacements were made, the

- // original string is left untouched.

- if (num_replacements > 0) {

- tmp.append(*s, pos, s->length() - pos);

- s->swap(tmp);

- }

- return num_replacements;

-int CalculateBase64EscapedLen(int input_len, bool do_padding) {

- // Base64 encodes three bytes of input at a time. If the input is not

- // divisible by three, we pad as appropriate.

- //

- // (from http://tools.ietf.org/html/rfc3548)

- // Special processing is performed if fewer than 24 bits are available

- // at the end of the data being encoded. A full encoding quantum is

- // always completed at the end of a quantity. When fewer than 24 input

- // bits are available in an input group, zero bits are added (on the

- // right) to form an integral number of 6-bit groups. Padding at the

- // end of the data is performed using the '=' character. Since all base

- // 64 input is an integral number of octets, only the following cases

- // can arise:

- // Base64 encodes each three bytes of input into four bytes of output.

- int len = (input_len / 3) * 4;

- if (input_len % 3 == 0) {

- // (from http://tools.ietf.org/html/rfc3548)

- // (1) the final quantum of encoding input is an integral multiple of 24

- // bits; here, the final unit of encoded output will be an integral

- // multiple of 4 characters with no "=" padding,

- } else if (input_len % 3 == 1) {

- // (from http://tools.ietf.org/html/rfc3548)

- // (2) the final quantum of encoding input is exactly 8 bits; here, the

- // final unit of encoded output will be two characters followed by two

- // "=" padding characters, or

- len += 2;

- if (do_padding) {

- len += 2;

- }

- } else { // (input_len % 3 == 2)

- // (from http://tools.ietf.org/html/rfc3548)

- // (3) the final quantum of encoding input is exactly 16 bits; here, the

- // final unit of encoded output will be three characters followed by one

- // "=" padding character.

- len += 3;

- if (do_padding) {

- len += 1;

- }

- assert(len >= input_len); // make sure we didn't overflow

- return len;

-// Base64Escape does padding, so this calculation includes padding.

-int CalculateBase64EscapedLen(int input_len) {

- return CalculateBase64EscapedLen(input_len, true);

-// ----------------------------------------------------------------------

-// int Base64Unescape() - base64 decoder

-// int Base64Escape() - base64 encoder

-// int WebSafeBase64Unescape() - Google's variation of base64 decoder

-// int WebSafeBase64Escape() - Google's variation of base64 encoder

-//

-// Check out

-// http://tools.ietf.org/html/rfc2045 for formal description, but what we

-// care about is that...

-// Take the encoded stuff in groups of 4 characters and turn each

-// character into a code 0 to 63 thus:

-// A-Z map to 0 to 25

-// a-z map to 26 to 51

-// 0-9 map to 52 to 61

-// +(- for WebSafe) maps to 62

-// /(_ for WebSafe) maps to 63

-// There will be four numbers, all less than 64 which can be represented

-// by a 6 digit binary number (aaaaaa, bbbbbb, cccccc, dddddd respectively).

-// Arrange the 6 digit binary numbers into three bytes as such:

-// aaaaaabb bbbbcccc ccdddddd

-// Equals signs (one or two) are used at the end of the encoded block to

-// indicate that the text was not an integer multiple of three bytes long.

-// ----------------------------------------------------------------------

-int Base64UnescapeInternal(const char *src_param, int szsrc,

- char *dest, int szdest,

- const signed char* unbase64) {

- static const char kPad64Equals = '=';

- static const char kPad64Dot = '.';

- int decode = 0;

- int destidx = 0;

- int state = 0;

- unsigned int ch = 0;

- unsigned int temp = 0;

- // If "char" is signed by default, using *src as an array index results in

- // accessing negative array elements. Treat the input as a pointer to

- // unsigned char to avoid this.

- const unsigned char *src = reinterpret_cast<const unsigned char*>(src_param);

- // The GET_INPUT macro gets the next input character, skipping

- // over any whitespace, and stopping when we reach the end of the

- // string or when we read any non-data character. The arguments are

- // an arbitrary identifier (used as a label for goto) and the number

- // of data bytes that must remain in the input to avoid aborting the

- // loop.

-#define GET_INPUT(label, remain) \

- label: \

- --szsrc; \

- ch = *src++; \

- decode = unbase64[ch]; \

- if (decode < 0) { \

- if (ascii_isspace(ch) && szsrc >= remain) \

- goto label; \

- state = 4 - remain; \

- break; \

- }

- // if dest is null, we're just checking to see if it's legal input

- // rather than producing output. (I suspect this could just be done

- // with a regexp...). We duplicate the loop so this test can be

- // outside it instead of in every iteration.

- if (dest) {

- // This loop consumes 4 input bytes and produces 3 output bytes

- // per iteration. We can't know at the start that there is enough

- // data left in the string for a full iteration, so the loop may

- // break out in the middle; if so 'state' will be set to the

- // number of input bytes read.

- while (szsrc >= 4) {

- // We'll start by optimistically assuming that the next four

- // bytes of the string (src[0..3]) are four good data bytes

- // (that is, no nulls, whitespace, padding chars, or illegal

- // chars). We need to test src[0..2] for nulls individually

- // before constructing temp to preserve the property that we

- // never read past a null in the string (no matter how long

- // szsrc claims the string is).

- if (!src[0] || !src[1] || !src[2] ||

- (temp = ((unsigned(unbase64[src[0]]) << 18) |

- (unsigned(unbase64[src[1]]) << 12) |

- (unsigned(unbase64[src[2]]) << 6) |

- (unsigned(unbase64[src[3]])))) & 0x80000000) {

- // Iff any of those four characters was bad (null, illegal,

- // whitespace, padding), then temp's high bit will be set

- // (because unbase64[] is -1 for all bad characters).

- //

- // We'll back up and resort to the slower decoder, which knows

- // how to handle those cases.

- GET_INPUT(first, 4);

- temp = decode;

- GET_INPUT(second, 3);

- temp = (temp << 6) | decode;

- GET_INPUT(third, 2);

- temp = (temp << 6) | decode;

- GET_INPUT(fourth, 1);

- temp = (temp << 6) | decode;

- } else {

- // We really did have four good data bytes, so advance four

- // characters in the string.

- szsrc -= 4;

- src += 4;

- decode = -1;

- ch = '\0';

- }

- // temp has 24 bits of input, so write that out as three bytes.

- if (destidx+3 > szdest) return -1;

- dest[destidx+2] = temp;

- temp >>= 8;

- dest[destidx+1] = temp;

- temp >>= 8;

- dest[destidx] = temp;

- destidx += 3;

- }

- } else {

- while (szsrc >= 4) {

- if (!src[0] || !src[1] || !src[2] ||

- (temp = ((unsigned(unbase64[src[0]]) << 18) |

- (unsigned(unbase64[src[1]]) << 12) |

- (unsigned(unbase64[src[2]]) << 6) |

- (unsigned(unbase64[src[3]])))) & 0x80000000) {

- GET_INPUT(first_no_dest, 4);

- GET_INPUT(second_no_dest, 3);

- GET_INPUT(third_no_dest, 2);

- GET_INPUT(fourth_no_dest, 1);

- } else {

- szsrc -= 4;

- src += 4;

- decode = -1;

- ch = '\0';

- }

- destidx += 3;

- }

-#undef GET_INPUT

- // if the loop terminated because we read a bad character, return

- // now.

- if (decode < 0 && ch != '\0' &&

- ch != kPad64Equals && ch != kPad64Dot && !ascii_isspace(ch))

- return -1;

- if (ch == kPad64Equals || ch == kPad64Dot) {

- // if we stopped by hitting an '=' or '.', un-read that character -- we'll

- // look at it again when we count to check for the proper number of

- // equals signs at the end.

- ++szsrc;

- --src;

- } else {

- // This loop consumes 1 input byte per iteration. It's used to

- // clean up the 0-3 input bytes remaining when the first, faster

- // loop finishes. 'temp' contains the data from 'state' input

- // characters read by the first loop.

- while (szsrc > 0) {

- --szsrc;

- ch = *src++;

- decode = unbase64[ch];

- if (decode < 0) {

- if (ascii_isspace(ch)) {

- continue;

- } else if (ch == '\0') {

- break;

- } else if (ch == kPad64Equals || ch == kPad64Dot) {

- // back up one character; we'll read it again when we check

- // for the correct number of pad characters at the end.

- ++szsrc;

- --src;

- break;

- } else {

- return -1;

- }

- // Each input character gives us six bits of output.

- temp = (temp << 6) | decode;

- ++state;

- if (state == 4) {

- // If we've accumulated 24 bits of output, write that out as

- // three bytes.

- if (dest) {

- if (destidx+3 > szdest) return -1;

- dest[destidx+2] = temp;

- temp >>= 8;

- dest[destidx+1] = temp;

- temp >>= 8;

- dest[destidx] = temp;

- }

- destidx += 3;

- state = 0;

- temp = 0;

- }

+double NoLocaleStrtod(const char* text, char** original_endptr) {

+ // We cannot simply set the locale to "C" temporarily with setlocale()

+ // as this is not thread-safe. Instead, we try to parse in the current

+ // locale first. If parsing stops at a '.' character, then this is a

+ // pretty good hint that we're actually in some other locale in which

+ // '.' is not the radix character.

+ char* temp_endptr;

+ double result = strtod(text, &temp_endptr);

+ if (original_endptr != NULL) *original_endptr = temp_endptr;

+ if (*temp_endptr != '.') return result;

+ // Parsing halted on a '.'. Perhaps we're in a different locale? Let's

+ // try to replace the '.' with a locale-specific radix character and

+ // try again.

+ string localized = LocalizeRadix(text, temp_endptr);

+ const char* localized_cstr = localized.c_str();

+ char* localized_endptr;

+ result = strtod(localized_cstr, &localized_endptr);

+ if ((localized_endptr - localized_cstr) >

+ (temp_endptr - text)) {

+ // This attempt got further, so replacing the decimal must have helped.

+ // Update original_endptr to point at the right location.

+ if (original_endptr != NULL) {

+ // size_diff is non-zero if the localized radix has multiple bytes.

+ int size_diff = localized.size() - strlen(text);

+ // const_cast is necessary to match the strtod() interface.

+ *original_endptr = const_cast<char*>(

+ text + (localized_endptr - localized_cstr - size_diff));

}

- // Process the leftover data contained in 'temp' at the end of the input.

- int expected_equals = 0;

- switch (state) {

- case 0:

- // Nothing left over; output is a multiple of 3 bytes.

- break;

- case 1:

- // Bad input; we have 6 bits left over.

- return -1;

- case 2:

- // Produce one more output byte from the 12 input bits we have left.

- if (dest) {

- if (destidx+1 > szdest) return -1;

- temp >>= 4;

- dest[destidx] = temp;

- }

- ++destidx;

- expected_equals = 2;

- break;

- case 3:

- // Produce two more output bytes from the 18 input bits we have left.

- if (dest) {

- if (destidx+2 > szdest) return -1;

- temp >>= 2;

- dest[destidx+1] = temp;

- temp >>= 8;

- dest[destidx] = temp;

- }

- destidx += 2;

- expected_equals = 1;

- break;

- default:

- // state should have no other values at this point.

- GOOGLE_LOG(FATAL) << "This can't happen; base64 decoder state = " << state;

- }

- // The remainder of the string should be all whitespace, mixed with

- // exactly 0 equals signs, or exactly 'expected_equals' equals

- // signs. (Always accepting 0 equals signs is a google extension

- // not covered in the RFC, as is accepting dot as the pad character.)

- int equals = 0;

- while (szsrc > 0 && *src) {

- if (*src == kPad64Equals || *src == kPad64Dot)

- ++equals;

- else if (!ascii_isspace(*src))

- return -1;

- --szsrc;

- ++src;

- }

- return (equals == 0 || equals == expected_equals) ? destidx : -1;

-// The arrays below were generated by the following code

-// #include <sys/time.h>

-// #include <stdlib.h>

-// #include <string.h>

-// main()

-// {

-// static const char Base64[] =

-// "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";

-// char *pos;

-// int idx, i, j;

-// printf(" ");

-// for (i = 0; i < 255; i += 8) {

-// for (j = i; j < i + 8; j++) {

-// pos = strchr(Base64, j);

-// if ((pos == NULL) || (j == 0))

-// idx = -1;

-// else

-// idx = pos - Base64;

-// if (idx == -1)

-// printf(" %2d, ", idx);

-// else

-// printf(" %2d/*%c*/,", idx, j);

-// }

-// printf("\n ");

-// }

-//

-// where the value of "Base64[]" was replaced by one of the base-64 conversion

-// tables from the functions below.

-static const signed char kUnBase64[] = {

- -1, -1, -1, -1, -1, -1, -1, -1,

- -1, -1, -1, 62/*+*/, -1, -1, -1, 63/*/ */,

- 52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/,

- 60/*8*/, 61/*9*/, -1, -1, -1, -1, -1, -1,

- -1, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/,

- 07/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/,

- 15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/,

- 23/*X*/, 24/*Y*/, 25/*Z*/, -1, -1, -1, -1, -1,

- -1, 26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/,

- 33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/,

- 41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/,

- 49/*x*/, 50/*y*/, 51/*z*/, -1, -1, -1, -1, -1,

- -1, -1, -1, -1, -1, -1, -1, -1,

- -1, -1, -1, -1, -1, -1, -1, -1

-};

-static const signed char kUnWebSafeBase64[] = {

- -1, -1, -1, -1, -1, -1, -1, -1,

- -1, -1, -1, -1, -1, 62/*-*/, -1, -1,

- 52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/,

- 60/*8*/, 61/*9*/, -1, -1, -1, -1, -1, -1,

- -1, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/,

- 07/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/,

- 15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/,

- 23/*X*/, 24/*Y*/, 25/*Z*/, -1, -1, -1, -1, 63/*_*/,

- -1, 26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/,

- 33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/,

- 41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/,

- 49/*x*/, 50/*y*/, 51/*z*/, -1, -1, -1, -1, -1,

- -1, -1, -1, -1, -1, -1, -1, -1,

- -1, -1, -1, -1, -1, -1, -1, -1

-};

-int WebSafeBase64Unescape(const char *src, int szsrc, char *dest, int szdest) {

- return Base64UnescapeInternal(src, szsrc, dest, szdest, kUnWebSafeBase64);

-static bool Base64UnescapeInternal(const char* src, int slen, string* dest,

- const signed char* unbase64) {

- // Determine the size of the output string. Base64 encodes every 3 bytes into

- // 4 characters. any leftover chars are added directly for good measure.

- // This is documented in the base64 RFC: http://tools.ietf.org/html/rfc3548

- const int dest_len = 3 * (slen / 4) + (slen % 4);

- dest->resize(dest_len);

- // We are getting the destination buffer by getting the beginning of the

- // string and converting it into a char *.

- const int len = Base64UnescapeInternal(src, slen, string_as_array(dest),

- dest_len, unbase64);

- if (len < 0) {

- dest->clear();

- return false;

- }

- // could be shorter if there was padding

- GOOGLE_DCHECK_LE(len, dest_len);

- dest->erase(len);

- return true;

-bool Base64Unescape(StringPiece src, string* dest) {

- return Base64UnescapeInternal(src.data(), src.size(), dest, kUnBase64);

-bool WebSafeBase64Unescape(StringPiece src, string* dest) {

- return Base64UnescapeInternal(src.data(), src.size(), dest, kUnWebSafeBase64);

-int Base64EscapeInternal(const unsigned char *src, int szsrc,

- char *dest, int szdest, const char *base64,

- bool do_padding) {

- static const char kPad64 = '=';

- if (szsrc <= 0) return 0;

- if (szsrc * 4 > szdest * 3) return 0;

- char *cur_dest = dest;

- const unsigned char *cur_src = src;

- char *limit_dest = dest + szdest;

- const unsigned char *limit_src = src + szsrc;

- // Three bytes of data encodes to four characters of cyphertext.

- // So we can pump through three-byte chunks atomically.

- while (cur_src < limit_src - 3) { // keep going as long as we have >= 32 bits

- uint32 in = BigEndian::Load32(cur_src) >> 8;

- cur_dest[0] = base64[in >> 18];

- in &= 0x3FFFF;

- cur_dest[1] = base64[in >> 12];

- in &= 0xFFF;

- cur_dest[2] = base64[in >> 6];

- in &= 0x3F;

- cur_dest[3] = base64[in];

- cur_dest += 4;

- cur_src += 3;

- }

- // To save time, we didn't update szdest or szsrc in the loop. So do it now.

- szdest = limit_dest - cur_dest;

- szsrc = limit_src - cur_src;

- /* now deal with the tail (<=3 bytes) */

- switch (szsrc) {

- case 0:

- // Nothing left; nothing more to do.

- break;

- case 1: {

- // One byte left: this encodes to two characters, and (optionally)

- // two pad characters to round out the four-character cypherblock.

- if ((szdest -= 2) < 0) return 0;

- uint32 in = cur_src[0];

- cur_dest[0] = base64[in >> 2];

- in &= 0x3;

- cur_dest[1] = base64[in << 4];

- cur_dest += 2;

- if (do_padding) {

- if ((szdest -= 2) < 0) return 0;

- cur_dest[0] = kPad64;

- cur_dest[1] = kPad64;

- cur_dest += 2;

- }

- break;

- }

- case 2: {

- // Two bytes left: this encodes to three characters, and (optionally)

- // one pad character to round out the four-character cypherblock.

- if ((szdest -= 3) < 0) return 0;

- uint32 in = BigEndian::Load16(cur_src);

- cur_dest[0] = base64[in >> 10];

- in &= 0x3FF;

- cur_dest[1] = base64[in >> 4];

- in &= 0x00F;

- cur_dest[2] = base64[in << 2];

- cur_dest += 3;

- if (do_padding) {

- if ((szdest -= 1) < 0) return 0;

- cur_dest[0] = kPad64;

- cur_dest += 1;

- }

- break;

- }

- case 3: {

- // Three bytes left: same as in the big loop above. We can't do this in

- // the loop because the loop above always reads 4 bytes, and the fourth

- // byte is past the end of the input.

- if ((szdest -= 4) < 0) return 0;

- uint32 in = (cur_src[0] << 16) + BigEndian::Load16(cur_src + 1);

- cur_dest[0] = base64[in >> 18];

- in &= 0x3FFFF;

- cur_dest[1] = base64[in >> 12];

- in &= 0xFFF;

- cur_dest[2] = base64[in >> 6];

- in &= 0x3F;

- cur_dest[3] = base64[in];

- cur_dest += 4;

- break;

- }

- default:

- // Should not be reached: blocks of 4 bytes are handled

- // in the while loop before this switch statement.

- GOOGLE_LOG(FATAL) << "Logic problem? szsrc = " << szsrc;

- break;

- }

- return (cur_dest - dest);

-static const char kBase64Chars[] =

-"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";

-static const char kWebSafeBase64Chars[] =

-"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";

-int Base64Escape(const unsigned char *src, int szsrc, char *dest, int szdest) {

- return Base64EscapeInternal(src, szsrc, dest, szdest, kBase64Chars, true);

-int WebSafeBase64Escape(const unsigned char *src, int szsrc, char *dest,

- int szdest, bool do_padding) {

- return Base64EscapeInternal(src, szsrc, dest, szdest,

- kWebSafeBase64Chars, do_padding);

-void Base64EscapeInternal(const unsigned char* src, int szsrc,

- string* dest, bool do_padding,

- const char* base64_chars) {

- const int calc_escaped_size =

- CalculateBase64EscapedLen(szsrc, do_padding);

- dest->resize(calc_escaped_size);

- const int escaped_len = Base64EscapeInternal(src, szsrc,

- string_as_array(dest),

- dest->size(),

- base64_chars,

- do_padding);

- GOOGLE_DCHECK_EQ(calc_escaped_size, escaped_len);

- dest->erase(escaped_len);

-void Base64Escape(const unsigned char *src, int szsrc,

- string* dest, bool do_padding) {

- Base64EscapeInternal(src, szsrc, dest, do_padding, kBase64Chars);

-void WebSafeBase64Escape(const unsigned char *src, int szsrc,

- string *dest, bool do_padding) {

- Base64EscapeInternal(src, szsrc, dest, do_padding, kWebSafeBase64Chars);

-void Base64Escape(StringPiece src, string* dest) {

- Base64Escape(reinterpret_cast<const unsigned char*>(src.data()),

- src.size(), dest, true);

-void WebSafeBase64Escape(StringPiece src, string* dest) {

- WebSafeBase64Escape(reinterpret_cast<const unsigned char*>(src.data()),

- src.size(), dest, false);

-void WebSafeBase64EscapeWithPadding(StringPiece src, string* dest) {

- WebSafeBase64Escape(reinterpret_cast<const unsigned char*>(src.data()),

- src.size(), dest, true);

-// Helper to append a Unicode code point to a string as UTF8, without bringing

-// in any external dependencies.

-int EncodeAsUTF8Char(uint32 code_point, char* output) {

- uint32 tmp = 0;

- int len = 0;

- if (code_point <= 0x7f) {

- tmp = code_point;

- len = 1;

- } else if (code_point <= 0x07ff) {

- tmp = 0x0000c080 |

- ((code_point & 0x07c0) << 2) |

- (code_point & 0x003f);

- len = 2;

- } else if (code_point <= 0xffff) {

- tmp = 0x00e08080 |

- ((code_point & 0xf000) << 4) |

- ((code_point & 0x0fc0) << 2) |

- (code_point & 0x003f);

- len = 3;

- } else {

- // UTF-16 is only defined for code points up to 0x10FFFF, and UTF-8 is

- // normally only defined up to there as well.

- tmp = 0xf0808080 |

- ((code_point & 0x1c0000) << 6) |

- ((code_point & 0x03f000) << 4) |

- ((code_point & 0x000fc0) << 2) |

- (code_point & 0x003f);

- len = 4;

- }

- tmp = ghtonl(tmp);

- memcpy(output, reinterpret_cast<const char*>(&tmp) + sizeof(tmp) - len, len);

- return len;

-// Table of UTF-8 character lengths, based on first byte

-static const unsigned char kUTF8LenTbl[256] = {

- 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,

- 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,

- 3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4

-};

-// Return length of a single UTF-8 source character

-int UTF8FirstLetterNumBytes(const char* src, int len) {

- if (len == 0) {

- return 0;

- }

- return kUTF8LenTbl[*reinterpret_cast<const uint8*>(src)];

+ return result;

}

} // namespace protobuf