Index: third_party/protobuf/src/google/protobuf/stubs/structurally_valid.cc |
diff --git a/third_party/protobuf/src/google/protobuf/stubs/structurally_valid.cc b/third_party/protobuf/src/google/protobuf/stubs/structurally_valid.cc |
index 0f6afe6dc8e11fbbee10a641436679debf6d6b91..d79a6ee45007c99d7b2059d50b1ff37485a3138d 100644 |
--- a/third_party/protobuf/src/google/protobuf/stubs/structurally_valid.cc |
+++ b/third_party/protobuf/src/google/protobuf/stubs/structurally_valid.cc |
@@ -3,6 +3,8 @@ |
#include <google/protobuf/stubs/common.h> |
+#include <google/protobuf/stubs/stringpiece.h> |
+ |
namespace google { |
namespace protobuf { |
namespace internal { |
@@ -531,6 +533,56 @@ bool IsStructurallyValidUTF8(const char* buf, int len) { |
return (bytes_consumed == len); |
} |
+int UTF8SpnStructurallyValid(const StringPiece& str) { |
+ if (!module_initialized_) return str.size(); |
+ |
+ int bytes_consumed = 0; |
+ UTF8GenericScanFastAscii(&utf8acceptnonsurrogates_obj, |
+ str.data(), str.size(), &bytes_consumed); |
+ return bytes_consumed; |
+} |
+ |
+// Coerce UTF-8 byte string in src_str to be |
+// a structurally-valid equal-length string by selectively |
+// overwriting illegal bytes with replace_char (typically blank). |
+// replace_char must be legal printable 7-bit Ascii 0x20..0x7e. |
+// src_str is read-only. If any overwriting is needed, a modified byte string |
+// is created in idst, length isrclen. |
+// |
+// Returns pointer to output buffer, isrc if no changes were made, |
+// or idst if some bytes were changed. |
+// |
+// Fast case: all is structurally valid and no byte copying is done. |
+// |
+char* UTF8CoerceToStructurallyValid(const StringPiece& src_str, |
+ char* idst, |
+ const char replace_char) { |
+ const char* isrc = src_str.data(); |
+ const int len = src_str.length(); |
+ int n = UTF8SpnStructurallyValid(src_str); |
+ if (n == len) { // Normal case -- all is cool, return |
+ return const_cast<char*>(isrc); |
+ } else { // Unusual case -- copy w/o bad bytes |
+ const char* src = isrc; |
+ const char* srclimit = isrc + len; |
+ char* dst = idst; |
+ memmove(dst, src, n); // Copy initial good chunk |
+ src += n; |
+ dst += n; |
+ while (src < srclimit) { // src points to bogus byte or is off the end |
+ dst[0] = replace_char; // replace one bad byte |
+ src++; |
+ dst++; |
+ StringPiece str2(src, srclimit - src); |
+ n = UTF8SpnStructurallyValid(str2); // scan the remainder |
+ memmove(dst, src, n); // copy next good chunk |
+ src += n; |
+ dst += n; |
+ } |
+ } |
+ return idst; |
+} |
+ |
} // namespace internal |
} // namespace protobuf |
} // namespace google |