| Index: third_party/protobuf/src/google/protobuf/stubs/structurally_valid.cc
|
| diff --git a/third_party/protobuf/src/google/protobuf/stubs/structurally_valid.cc b/third_party/protobuf/src/google/protobuf/stubs/structurally_valid.cc
|
| index 0f6afe6dc8e11fbbee10a641436679debf6d6b91..d79a6ee45007c99d7b2059d50b1ff37485a3138d 100644
|
| --- a/third_party/protobuf/src/google/protobuf/stubs/structurally_valid.cc
|
| +++ b/third_party/protobuf/src/google/protobuf/stubs/structurally_valid.cc
|
| @@ -3,6 +3,8 @@
|
|
|
| #include <google/protobuf/stubs/common.h>
|
|
|
| +#include <google/protobuf/stubs/stringpiece.h>
|
| +
|
| namespace google {
|
| namespace protobuf {
|
| namespace internal {
|
| @@ -531,6 +533,56 @@ bool IsStructurallyValidUTF8(const char* buf, int len) {
|
| return (bytes_consumed == len);
|
| }
|
|
|
| +int UTF8SpnStructurallyValid(const StringPiece& str) {
|
| + if (!module_initialized_) return str.size();
|
| +
|
| + int bytes_consumed = 0;
|
| + UTF8GenericScanFastAscii(&utf8acceptnonsurrogates_obj,
|
| + str.data(), str.size(), &bytes_consumed);
|
| + return bytes_consumed;
|
| +}
|
| +
|
| +// Coerce UTF-8 byte string in src_str to be
|
| +// a structurally-valid equal-length string by selectively
|
| +// overwriting illegal bytes with replace_char (typically blank).
|
| +// replace_char must be legal printable 7-bit Ascii 0x20..0x7e.
|
| +// src_str is read-only. If any overwriting is needed, a modified byte string
|
| +// is created in idst, length isrclen.
|
| +//
|
| +// Returns pointer to output buffer, isrc if no changes were made,
|
| +// or idst if some bytes were changed.
|
| +//
|
| +// Fast case: all is structurally valid and no byte copying is done.
|
| +//
|
| +char* UTF8CoerceToStructurallyValid(const StringPiece& src_str,
|
| + char* idst,
|
| + const char replace_char) {
|
| + const char* isrc = src_str.data();
|
| + const int len = src_str.length();
|
| + int n = UTF8SpnStructurallyValid(src_str);
|
| + if (n == len) { // Normal case -- all is cool, return
|
| + return const_cast<char*>(isrc);
|
| + } else { // Unusual case -- copy w/o bad bytes
|
| + const char* src = isrc;
|
| + const char* srclimit = isrc + len;
|
| + char* dst = idst;
|
| + memmove(dst, src, n); // Copy initial good chunk
|
| + src += n;
|
| + dst += n;
|
| + while (src < srclimit) { // src points to bogus byte or is off the end
|
| + dst[0] = replace_char; // replace one bad byte
|
| + src++;
|
| + dst++;
|
| + StringPiece str2(src, srclimit - src);
|
| + n = UTF8SpnStructurallyValid(str2); // scan the remainder
|
| + memmove(dst, src, n); // copy next good chunk
|
| + src += n;
|
| + dst += n;
|
| + }
|
| + }
|
| + return idst;
|
| +}
|
| +
|
| } // namespace internal
|
| } // namespace protobuf
|
| } // namespace google
|
|
|