Chromium Code Reviews| Index: third_party/WebKit/Source/platform/inspector_protocol/String16STL.cpp |
| diff --git a/third_party/WebKit/Source/platform/inspector_protocol/String16STL.cpp b/third_party/WebKit/Source/platform/inspector_protocol/String16STL.cpp |
| index 9d7a9d684a61c5ab985addfc70b0813c09a4f7a5..a7ac0626a1297307dacd94bcda47d53f44b8eb52 100644 |
| --- a/third_party/WebKit/Source/platform/inspector_protocol/String16STL.cpp |
| +++ b/third_party/WebKit/Source/platform/inspector_protocol/String16STL.cpp |
| @@ -6,9 +6,12 @@ |
| #include <algorithm> |
| #include <cctype> |
| +#include <cstdio> |
| #include <functional> |
| #include <locale> |
| +#define DCHECK(k) |
|
dgozman
2016/06/08 15:56:56
Not needed, as you have it in Platform.h
|
| + |
| namespace blink { |
| namespace protocol { |
| @@ -274,6 +277,206 @@ ConversionResult convertUTF16ToUTF8( |
| return result; |
| } |
| +/** |
| + * Is this code point a BMP code point (U+0000..U+ffff)? |
| + * @param c 32-bit code point |
| + * @return TRUE or FALSE |
| + * @stable ICU 2.8 |
| + */ |
| +#define U_IS_BMP(c) ((uint32_t)(c) <= 0xffff) |
| + |
| +/** |
| + * Is this code point a supplementary code point (U+10000..U+10ffff)? |
| + * @param c 32-bit code point |
| + * @return TRUE or FALSE |
| + * @stable ICU 2.8 |
| + */ |
| +#define U_IS_SUPPLEMENTARY(c) ((uint32_t)((c) - 0x10000) <= 0xfffff) |
| + |
| +/** |
| + * Is this code point a surrogate (U+d800..U+dfff)? |
| + * @param c 32-bit code point |
| + * @return TRUE or FALSE |
| + * @stable ICU 2.4 |
| + */ |
| +#define U_IS_SURROGATE(c) (((c) & 0xfffff800) == 0xd800) |
| + |
| +/** |
| + * Get the lead surrogate (0xd800..0xdbff) for a |
| + * supplementary code point (0x10000..0x10ffff). |
| + * @param supplementary 32-bit code point (U+10000..U+10ffff) |
| + * @return lead surrogate (U+d800..U+dbff) for supplementary |
| + * @stable ICU 2.4 |
| + */ |
| +#define U16_LEAD(supplementary) (UChar)(((supplementary) >> 10) + 0xd7c0) |
| + |
| +/** |
| + * Get the trail surrogate (0xdc00..0xdfff) for a |
| + * supplementary code point (0x10000..0x10ffff). |
| + * @param supplementary 32-bit code point (U+10000..U+10ffff) |
| + * @return trail surrogate (U+dc00..U+dfff) for supplementary |
| + * @stable ICU 2.4 |
| + */ |
| +#define U16_TRAIL(supplementary) (UChar)(((supplementary) & 0x3ff) | 0xdc00) |
| + |
| +// This must be called with the length pre-determined by the first byte. |
| +// If presented with a length > 4, this returns false. The Unicode |
| +// definition of UTF-8 goes up to 4-byte sequences. |
| +static bool isLegalUTF8(const unsigned char* source, int length) |
| +{ |
| + unsigned char a; |
| + const unsigned char* srcptr = source + length; |
| + switch (length) { |
| + default: |
| + return false; |
| + // Everything else falls through when "true"... |
| + case 4: |
| + if ((a = (*--srcptr)) < 0x80 || a > 0xBF) |
| + return false; |
| + case 3: |
| + if ((a = (*--srcptr)) < 0x80 || a > 0xBF) |
| + return false; |
| + case 2: |
| + if ((a = (*--srcptr)) > 0xBF) |
| + return false; |
| + |
| + // no fall-through in this inner switch |
| + switch (*source) { |
| + case 0xE0: |
| + if (a < 0xA0) |
| + return false; |
| + break; |
| + case 0xED: |
| + if (a > 0x9F) |
| + return false; |
| + break; |
| + case 0xF0: |
| + if (a < 0x90) |
| + return false; |
| + break; |
| + case 0xF4: |
| + if (a > 0x8F) |
| + return false; |
| + break; |
| + default: |
| + if (a < 0x80) |
| + return false; |
| + } |
| + |
| + case 1: |
| + if (*source >= 0x80 && *source < 0xC2) |
| + return false; |
| + } |
| + if (*source > 0xF4) |
| + return false; |
| + return true; |
| +} |
| + |
| +// Magic values subtracted from a buffer value during UTF8 conversion. |
| +// This table contains as many values as there might be trailing bytes |
| +// in a UTF-8 sequence. |
| +static const UChar32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, 0x03C82080UL, static_cast<UChar32>(0xFA082080UL), static_cast<UChar32>(0x82082080UL) }; |
| + |
| +static inline UChar32 readUTF8Sequence(const char*& sequence, unsigned length) |
| +{ |
| + UChar32 character = 0; |
| + |
| + // The cases all fall through. |
| + switch (length) { |
| + case 6: |
| + character += static_cast<unsigned char>(*sequence++); |
| + character <<= 6; |
| + case 5: |
| + character += static_cast<unsigned char>(*sequence++); |
| + character <<= 6; |
| + case 4: |
| + character += static_cast<unsigned char>(*sequence++); |
| + character <<= 6; |
| + case 3: |
| + character += static_cast<unsigned char>(*sequence++); |
| + character <<= 6; |
| + case 2: |
| + character += static_cast<unsigned char>(*sequence++); |
| + character <<= 6; |
| + case 1: |
| + character += static_cast<unsigned char>(*sequence++); |
| + } |
| + |
| + return character - offsetsFromUTF8[length - 1]; |
| +} |
| + |
| +ConversionResult convertUTF8ToUTF16( |
| + const char** sourceStart, const char* sourceEnd, |
| + UChar** targetStart, UChar* targetEnd, bool* sourceAllASCII, bool strict) |
| +{ |
| + ConversionResult result = conversionOK; |
| + const char* source = *sourceStart; |
| + UChar* target = *targetStart; |
| + UChar orAllData = 0; |
| + while (source < sourceEnd) { |
| + int utf8SequenceLength = inlineUTF8SequenceLength(*source); |
| + if (sourceEnd - source < utf8SequenceLength) { |
| + result = sourceExhausted; |
| + break; |
| + } |
| + // Do this check whether lenient or strict |
| + if (!isLegalUTF8(reinterpret_cast<const unsigned char*>(source), utf8SequenceLength)) { |
| + result = sourceIllegal; |
| + break; |
| + } |
| + |
| + UChar32 character = readUTF8Sequence(source, utf8SequenceLength); |
| + |
| + if (target >= targetEnd) { |
| + source -= utf8SequenceLength; // Back up source pointer! |
| + result = targetExhausted; |
| + break; |
| + } |
| + |
| + if (U_IS_BMP(character)) { |
| + // UTF-16 surrogate values are illegal in UTF-32 |
| + if (U_IS_SURROGATE(character)) { |
| + if (strict) { |
| + source -= utf8SequenceLength; // return to the illegal value itself |
| + result = sourceIllegal; |
| + break; |
| + } |
| + *target++ = replacementCharacter; |
| + orAllData |= replacementCharacter; |
| + } else { |
| + *target++ = static_cast<UChar>(character); // normal case |
| + orAllData |= character; |
| + } |
| + } else if (U_IS_SUPPLEMENTARY(character)) { |
| + // target is a character in range 0xFFFF - 0x10FFFF |
| + if (target + 1 >= targetEnd) { |
| + source -= utf8SequenceLength; // Back up source pointer! |
| + result = targetExhausted; |
| + break; |
| + } |
| + *target++ = U16_LEAD(character); |
| + *target++ = U16_TRAIL(character); |
| + orAllData = 0xffff; |
| + } else { |
| + if (strict) { |
| + source -= utf8SequenceLength; // return to the start |
| + result = sourceIllegal; |
| + break; // Bail out; shouldn't continue |
| + } else { |
| + *target++ = replacementCharacter; |
| + orAllData |= replacementCharacter; |
| + } |
| + } |
| + } |
| + *sourceStart = source; |
| + *targetStart = target; |
| + |
| + if (sourceAllASCII) |
| + *sourceAllASCII = !(orAllData & ~0x7f); |
| + |
| + return result; |
| +} |
| + |
| // Helper to write a three-byte UTF-8 code point to the buffer, caller must check room is available. |
| static inline void putUTF8Triple(char*& buffer, UChar ch) |
| { |
| @@ -283,6 +486,23 @@ static inline void putUTF8Triple(char*& buffer, UChar ch) |
| *buffer++ = static_cast<char>((ch & 0x3F) | 0x80); |
| } |
| +String16 String16::fromUTF8(const char* stringStart, size_t length) |
| +{ |
| + if (!stringStart || !length) |
| + return String16(); |
| + |
| + std::vector<UChar> buffer(length); |
| + UChar* bufferStart = buffer.data(); |
| + |
| + UChar* bufferCurrent = bufferStart; |
| + const char* stringCurrent = stringStart; |
| + if (convertUTF8ToUTF16(&stringCurrent, stringStart + length, &bufferCurrent, bufferCurrent + buffer.size(), 0, true) != conversionOK) |
| + return String16(); |
| + |
| + unsigned utf16Length = bufferCurrent - bufferStart; |
| + return String16(bufferStart, utf16Length); |
| +} |
| + |
| // trim from start |
| static inline wstring <rim(wstring &s) |
| { |
| @@ -303,6 +523,22 @@ static inline wstring &trim(wstring &s) |
| return ltrim(rtrim(s)); |
| } |
| +// static |
| +std::string String16::intToString(int i) |
| +{ |
| + char buffer[50]; |
| + std::sprintf(buffer, "%d", i); |
| + return std::string(buffer); |
| +} |
| + |
| +// static |
| +std::string String16::doubleToString(double d) |
| +{ |
| + char buffer[100]; |
| + std::sprintf(buffer, "%f", d); |
| + return std::string(buffer); |
| +} |
| + |
| std::string String16::utf8() const |
| { |
| unsigned length = this->length(); |