Chromium Code Reviews| Index: third_party/WebKit/Source/wtf/text/UTF8.cpp |
| diff --git a/third_party/WebKit/Source/wtf/text/UTF8.cpp b/third_party/WebKit/Source/wtf/text/UTF8.cpp |
| index 0beca10497c8e8411d13883305a172c28b264b45..28c4ae1b6e91c9e5670c76be20f7ff9bd739e372 100644 |
| --- a/third_party/WebKit/Source/wtf/text/UTF8.cpp |
| +++ b/third_party/WebKit/Source/wtf/text/UTF8.cpp |
| @@ -442,5 +442,23 @@ bool equalLatin1WithUTF8(const LChar* a, const LChar* aEnd, const char* b, const |
| return equalWithUTF8Internal(a, aEnd, b, bEnd); |
| } |
| +bool isUTF8Encoded(const char* data, size_t length) |
| +{ |
| + // This cast is necessary because U8_NEXT uses int32_ts. |
|
tkent
2016/03/02 00:55:55
is it possible that |length| is greater than 2^31?
Jinsuk Kim
2016/03/02 01:14:31
The document data (HTML, CSS, JS, etc) can be theo
tkent
2016/03/02 01:36:39
ok. Even if |length| is larger than the maximum v
|
| + int32_t srcLen = static_cast<int32_t>(length); |
| + int32_t charIndex = 0; |
| + bool markDetected = false; |
| + |
| + while (charIndex < srcLen) { |
| + int32_t codePoint; |
| + if ((uint8_t)(data[charIndex]) >= 0x80) |
|
tkent
2016/03/02 00:55:55
Please do not use C-style type cast.
Jinsuk Kim
2016/03/02 01:14:31
Done.
|
| + markDetected = true; |
| + U8_NEXT(data, charIndex, srcLen, codePoint); |
| + if (!U_IS_UNICODE_CHAR(codePoint)) |
| + return false; |
| + } |
| + return markDetected; |
| +} |
| + |
| } // namespace Unicode |
| } // namespace WTF |