Index: third_party/WebKit/Source/wtf/text/UTF8.cpp |
diff --git a/third_party/WebKit/Source/wtf/text/UTF8.cpp b/third_party/WebKit/Source/wtf/text/UTF8.cpp |
index 0beca10497c8e8411d13883305a172c28b264b45..e78d0ad7ede75afeb237876599d28d44f9d66739 100644 |
--- a/third_party/WebKit/Source/wtf/text/UTF8.cpp |
+++ b/third_party/WebKit/Source/wtf/text/UTF8.cpp |
@@ -442,5 +442,23 @@ bool equalLatin1WithUTF8(const LChar* a, const LChar* aEnd, const char* b, const |
return equalWithUTF8Internal(a, aEnd, b, bEnd); |
} |
+bool isUTF8Encoded(const char* data, size_t length) |
jungshik at Google
2016/04/03 00:52:04
Without looking at the header file or the function
Jinsuk Kim
2016/04/06 04:34:11
Chose isUTF8andNotASCII
|
+{ |
+ // This cast is necessary because U8_NEXT uses int32_ts. |
+ int32_t srcLen = static_cast<int32_t>(length); |
+ int32_t charIndex = 0; |
+ bool markDetected = false; |
jungshik at Google
2016/04/03 00:52:04
At first, it took me a while to figure out what th
Jinsuk Kim
2016/04/06 04:34:11
Done.
|
+ |
+ while (charIndex < srcLen) { |
+ int32_t codePoint; |
+ if (static_cast<uint8_t>(data[charIndex]) >= 0x80) |
+ markDetected = true; |
+ U8_NEXT(data, charIndex, srcLen, codePoint); |
+ if (!U_IS_UNICODE_CHAR(codePoint)) |
+ return false; |
+ } |
+ return markDetected; |
+} |
+ |
} // namespace Unicode |
} // namespace WTF |