| Index: third_party/WebKit/Source/platform/inspector_protocol/String16STL.cpp
|
| diff --git a/third_party/WebKit/Source/platform/inspector_protocol/String16STL.cpp b/third_party/WebKit/Source/platform/inspector_protocol/String16STL.cpp
|
| index 9d7a9d684a61c5ab985addfc70b0813c09a4f7a5..08c3e448c3b29419bae8a2a108d56fd057754b91 100644
|
| --- a/third_party/WebKit/Source/platform/inspector_protocol/String16STL.cpp
|
| +++ b/third_party/WebKit/Source/platform/inspector_protocol/String16STL.cpp
|
| @@ -4,8 +4,11 @@
|
|
|
| #include "platform/inspector_protocol/String16STL.h"
|
|
|
| +#include "platform/inspector_protocol/Platform.h"
|
| +
|
| #include <algorithm>
|
| #include <cctype>
|
| +#include <cstdio>
|
| #include <functional>
|
| #include <locale>
|
|
|
| @@ -274,6 +277,206 @@ ConversionResult convertUTF16ToUTF8(
|
| return result;
|
| }
|
|
|
| +/**
|
| + * Is this code point a BMP code point (U+0000..U+ffff)?
|
| + * @param c 32-bit code point
|
| + * @return TRUE or FALSE
|
| + * @stable ICU 2.8
|
| + */
|
| +#define U_IS_BMP(c) ((uint32_t)(c) <= 0xffff)
|
| +
|
| +/**
|
| + * Is this code point a supplementary code point (U+10000..U+10ffff)?
|
| + * @param c 32-bit code point
|
| + * @return TRUE or FALSE
|
| + * @stable ICU 2.8
|
| + */
|
| +#define U_IS_SUPPLEMENTARY(c) ((uint32_t)((c) - 0x10000) <= 0xfffff)
|
| +
|
| +/**
|
| + * Is this code point a surrogate (U+d800..U+dfff)?
|
| + * @param c 32-bit code point
|
| + * @return TRUE or FALSE
|
| + * @stable ICU 2.4
|
| + */
|
| +#define U_IS_SURROGATE(c) (((c) & 0xfffff800) == 0xd800)
|
| +
|
| +/**
|
| + * Get the lead surrogate (0xd800..0xdbff) for a
|
| + * supplementary code point (0x10000..0x10ffff).
|
| + * @param supplementary 32-bit code point (U+10000..U+10ffff)
|
| + * @return lead surrogate (U+d800..U+dbff) for supplementary
|
| + * @stable ICU 2.4
|
| + */
|
| +#define U16_LEAD(supplementary) (UChar)(((supplementary) >> 10) + 0xd7c0)
|
| +
|
| +/**
|
| + * Get the trail surrogate (0xdc00..0xdfff) for a
|
| + * supplementary code point (0x10000..0x10ffff).
|
| + * @param supplementary 32-bit code point (U+10000..U+10ffff)
|
| + * @return trail surrogate (U+dc00..U+dfff) for supplementary
|
| + * @stable ICU 2.4
|
| + */
|
| +#define U16_TRAIL(supplementary) (UChar)(((supplementary) & 0x3ff) | 0xdc00)
|
| +
|
| +// This must be called with the length pre-determined by the first byte.
|
| +// If presented with a length > 4, this returns false. The Unicode
|
| +// definition of UTF-8 goes up to 4-byte sequences.
|
| +static bool isLegalUTF8(const unsigned char* source, int length)
|
| +{
|
| + unsigned char a;
|
| + const unsigned char* srcptr = source + length;
|
| + switch (length) {
|
| + default:
|
| + return false;
|
| + // Everything else falls through when "true"...
|
| + case 4:
|
| + if ((a = (*--srcptr)) < 0x80 || a > 0xBF)
|
| + return false;
|
| + case 3:
|
| + if ((a = (*--srcptr)) < 0x80 || a > 0xBF)
|
| + return false;
|
| + case 2:
|
| + if ((a = (*--srcptr)) > 0xBF)
|
| + return false;
|
| +
|
| + // no fall-through in this inner switch
|
| + switch (*source) {
|
| + case 0xE0:
|
| + if (a < 0xA0)
|
| + return false;
|
| + break;
|
| + case 0xED:
|
| + if (a > 0x9F)
|
| + return false;
|
| + break;
|
| + case 0xF0:
|
| + if (a < 0x90)
|
| + return false;
|
| + break;
|
| + case 0xF4:
|
| + if (a > 0x8F)
|
| + return false;
|
| + break;
|
| + default:
|
| + if (a < 0x80)
|
| + return false;
|
| + }
|
| +
|
| + case 1:
|
| + if (*source >= 0x80 && *source < 0xC2)
|
| + return false;
|
| + }
|
| + if (*source > 0xF4)
|
| + return false;
|
| + return true;
|
| +}
|
| +
|
| +// Magic values subtracted from a buffer value during UTF8 conversion.
|
| +// This table contains as many values as there might be trailing bytes
|
| +// in a UTF-8 sequence.
|
| +static const UChar32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, 0x03C82080UL, static_cast<UChar32>(0xFA082080UL), static_cast<UChar32>(0x82082080UL) };
|
| +
|
| +static inline UChar32 readUTF8Sequence(const char*& sequence, unsigned length)
|
| +{
|
| + UChar32 character = 0;
|
| +
|
| + // The cases all fall through.
|
| + switch (length) {
|
| + case 6:
|
| + character += static_cast<unsigned char>(*sequence++);
|
| + character <<= 6;
|
| + case 5:
|
| + character += static_cast<unsigned char>(*sequence++);
|
| + character <<= 6;
|
| + case 4:
|
| + character += static_cast<unsigned char>(*sequence++);
|
| + character <<= 6;
|
| + case 3:
|
| + character += static_cast<unsigned char>(*sequence++);
|
| + character <<= 6;
|
| + case 2:
|
| + character += static_cast<unsigned char>(*sequence++);
|
| + character <<= 6;
|
| + case 1:
|
| + character += static_cast<unsigned char>(*sequence++);
|
| + }
|
| +
|
| + return character - offsetsFromUTF8[length - 1];
|
| +}
|
| +
|
| +ConversionResult convertUTF8ToUTF16(
|
| + const char** sourceStart, const char* sourceEnd,
|
| + UChar** targetStart, UChar* targetEnd, bool* sourceAllASCII, bool strict)
|
| +{
|
| + ConversionResult result = conversionOK;
|
| + const char* source = *sourceStart;
|
| + UChar* target = *targetStart;
|
| + UChar orAllData = 0;
|
| + while (source < sourceEnd) {
|
| + int utf8SequenceLength = inlineUTF8SequenceLength(*source);
|
| + if (sourceEnd - source < utf8SequenceLength) {
|
| + result = sourceExhausted;
|
| + break;
|
| + }
|
| + // Do this check whether lenient or strict
|
| + if (!isLegalUTF8(reinterpret_cast<const unsigned char*>(source), utf8SequenceLength)) {
|
| + result = sourceIllegal;
|
| + break;
|
| + }
|
| +
|
| + UChar32 character = readUTF8Sequence(source, utf8SequenceLength);
|
| +
|
| + if (target >= targetEnd) {
|
| + source -= utf8SequenceLength; // Back up source pointer!
|
| + result = targetExhausted;
|
| + break;
|
| + }
|
| +
|
| + if (U_IS_BMP(character)) {
|
| + // UTF-16 surrogate values are illegal in UTF-32
|
| + if (U_IS_SURROGATE(character)) {
|
| + if (strict) {
|
| + source -= utf8SequenceLength; // return to the illegal value itself
|
| + result = sourceIllegal;
|
| + break;
|
| + }
|
| + *target++ = replacementCharacter;
|
| + orAllData |= replacementCharacter;
|
| + } else {
|
| + *target++ = static_cast<UChar>(character); // normal case
|
| + orAllData |= character;
|
| + }
|
| + } else if (U_IS_SUPPLEMENTARY(character)) {
|
| + // target is a character in range 0xFFFF - 0x10FFFF
|
| + if (target + 1 >= targetEnd) {
|
| + source -= utf8SequenceLength; // Back up source pointer!
|
| + result = targetExhausted;
|
| + break;
|
| + }
|
| + *target++ = U16_LEAD(character);
|
| + *target++ = U16_TRAIL(character);
|
| + orAllData = 0xffff;
|
| + } else {
|
| + if (strict) {
|
| + source -= utf8SequenceLength; // return to the start
|
| + result = sourceIllegal;
|
| + break; // Bail out; shouldn't continue
|
| + } else {
|
| + *target++ = replacementCharacter;
|
| + orAllData |= replacementCharacter;
|
| + }
|
| + }
|
| + }
|
| + *sourceStart = source;
|
| + *targetStart = target;
|
| +
|
| + if (sourceAllASCII)
|
| + *sourceAllASCII = !(orAllData & ~0x7f);
|
| +
|
| + return result;
|
| +}
|
| +
|
| // Helper to write a three-byte UTF-8 code point to the buffer, caller must check room is available.
|
| static inline void putUTF8Triple(char*& buffer, UChar ch)
|
| {
|
| @@ -283,6 +486,23 @@ static inline void putUTF8Triple(char*& buffer, UChar ch)
|
| *buffer++ = static_cast<char>((ch & 0x3F) | 0x80);
|
| }
|
|
|
| +String16 String16::fromUTF8(const char* stringStart, size_t length)
|
| +{
|
| + if (!stringStart || !length)
|
| + return String16();
|
| +
|
| + std::vector<UChar> buffer(length);
|
| + UChar* bufferStart = buffer.data();
|
| +
|
| + UChar* bufferCurrent = bufferStart;
|
| + const char* stringCurrent = stringStart;
|
| + if (convertUTF8ToUTF16(&stringCurrent, stringStart + length, &bufferCurrent, bufferCurrent + buffer.size(), 0, true) != conversionOK)
|
| + return String16();
|
| +
|
| + unsigned utf16Length = bufferCurrent - bufferStart;
|
| + return String16(bufferStart, utf16Length);
|
| +}
|
| +
|
| // trim from start
|
| static inline wstring <rim(wstring &s)
|
| {
|
| @@ -303,6 +523,22 @@ static inline wstring &trim(wstring &s)
|
| return ltrim(rtrim(s));
|
| }
|
|
|
| +// static
|
| +std::string String16::intToString(int i)
|
| +{
|
| + char buffer[50];
|
| + std::sprintf(buffer, "%d", i);
|
| + return std::string(buffer);
|
| +}
|
| +
|
| +// static
|
| +std::string String16::doubleToString(double d)
|
| +{
|
| + char buffer[100];
|
| + std::sprintf(buffer, "%f", d);
|
| + return std::string(buffer);
|
| +}
|
| +
|
| std::string String16::utf8() const
|
| {
|
| unsigned length = this->length();
|
|
|