Index: third_party/WebKit/Source/wtf/text/UTF8.cpp |
diff --git a/third_party/WebKit/Source/wtf/text/UTF8.cpp b/third_party/WebKit/Source/wtf/text/UTF8.cpp |
index 0beca10497c8e8411d13883305a172c28b264b45..28c4ae1b6e91c9e5670c76be20f7ff9bd739e372 100644 |
--- a/third_party/WebKit/Source/wtf/text/UTF8.cpp |
+++ b/third_party/WebKit/Source/wtf/text/UTF8.cpp |
@@ -442,5 +442,23 @@ bool equalLatin1WithUTF8(const LChar* a, const LChar* aEnd, const char* b, const |
return equalWithUTF8Internal(a, aEnd, b, bEnd); |
} |
+bool isUTF8Encoded(const char* data, size_t length) |
+{ |
+ // This cast is necessary because U8_NEXT uses int32_ts. |
tkent
2016/03/02 00:55:55
is it possible that |length| is greater than 2^31?
Jinsuk Kim
2016/03/02 01:14:31
The document data (HTML, CSS, JS, etc) can be theo
tkent
2016/03/02 01:36:39
ok. Even if |length| is larger than the maximum v
|
+ int32_t srcLen = static_cast<int32_t>(length); |
+ int32_t charIndex = 0; |
+ bool markDetected = false; |
+ |
+ while (charIndex < srcLen) { |
+ int32_t codePoint; |
+ if ((uint8_t)(data[charIndex]) >= 0x80) |
tkent
2016/03/02 00:55:55
Please do not use C-style type cast.
Jinsuk Kim
2016/03/02 01:14:31
Done.
|
+ markDetected = true; |
+ U8_NEXT(data, charIndex, srcLen, codePoint); |
+ if (!U_IS_UNICODE_CHAR(codePoint)) |
+ return false; |
+ } |
+ return markDetected; |
+} |
+ |
} // namespace Unicode |
} // namespace WTF |