| Index: third_party/WebKit/Source/wtf/text/UTF8.cpp
|
| diff --git a/third_party/WebKit/Source/wtf/text/UTF8.cpp b/third_party/WebKit/Source/wtf/text/UTF8.cpp
|
| index 79abd8c059b1fff01efdbd5030981cb3bfa04032..9a5f2ce598c7a52a5490adb4af1cfe796a0284dc 100644
|
| --- a/third_party/WebKit/Source/wtf/text/UTF8.cpp
|
| +++ b/third_party/WebKit/Source/wtf/text/UTF8.cpp
|
| @@ -55,13 +55,13 @@ inline int inlineUTF8SequenceLength(char b0)
|
| // Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
|
| // into the first byte, depending on how many bytes follow. There are
|
| // as many entries in this table as there are UTF-8 sequence types.
|
| -// (I.e., one byte sequence, two byte... etc.). Remember that sequencs
|
| +// (I.e., one byte sequence, two byte... etc.). Remember that sequences
|
| // for *legal* UTF-8 will be 4 or fewer bytes total.
|
| static const unsigned char firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
|
|
|
| ConversionResult convertLatin1ToUTF8(
|
| - const LChar** sourceStart, const LChar* sourceEnd,
|
| - char** targetStart, char* targetEnd)
|
| + const LChar** sourceStart, const LChar* sourceEnd,
|
| + char** targetStart, char* targetEnd)
|
| {
|
| ConversionResult result = conversionOK;
|
| const LChar* source = *sourceStart;
|
| @@ -164,10 +164,17 @@ ConversionResult convertUTF16ToUTF8(
|
| break;
|
| }
|
| switch (bytesToWrite) { // note: everything falls through.
|
| - case 4: *--target = (char)((ch | byteMark) & byteMask); ch >>= 6;
|
| - case 3: *--target = (char)((ch | byteMark) & byteMask); ch >>= 6;
|
| - case 2: *--target = (char)((ch | byteMark) & byteMask); ch >>= 6;
|
| - case 1: *--target = (char)(ch | firstByteMark[bytesToWrite]);
|
| + case 4:
|
| + *--target = (char)((ch | byteMark) & byteMask);
|
| + ch >>= 6;
|
| + case 3:
|
| + *--target = (char)((ch | byteMark) & byteMask);
|
| + ch >>= 6;
|
| + case 2:
|
| + *--target = (char)((ch | byteMark) & byteMask);
|
| + ch >>= 6;
|
| + case 1:
|
| + *--target = (char)(ch | firstByteMark[bytesToWrite]);
|
| }
|
| target += bytesToWrite;
|
| }
|
| @@ -184,22 +191,45 @@ static bool isLegalUTF8(const unsigned char* source, int length)
|
| unsigned char a;
|
| const unsigned char* srcptr = source + length;
|
| switch (length) {
|
| - default: return false;
|
| - // Everything else falls through when "true"...
|
| - case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
|
| - case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
|
| - case 2: if ((a = (*--srcptr)) > 0xBF) return false;
|
| + default:
|
| + return false;
|
| + // Everything else falls through when "true"...
|
| + case 4:
|
| + if ((a = (*--srcptr)) < 0x80 || a > 0xBF)
|
| + return false;
|
| + case 3:
|
| + if ((a = (*--srcptr)) < 0x80 || a > 0xBF)
|
| + return false;
|
| + case 2:
|
| + if ((a = (*--srcptr)) > 0xBF)
|
| + return false;
|
|
|
| + // no fall-through in this inner switch
|
| switch (*source) {
|
| - // no fall-through in this inner switch
|
| - case 0xE0: if (a < 0xA0) return false; break;
|
| - case 0xED: if (a > 0x9F) return false; break;
|
| - case 0xF0: if (a < 0x90) return false; break;
|
| - case 0xF4: if (a > 0x8F) return false; break;
|
| - default: if (a < 0x80) return false;
|
| + case 0xE0:
|
| + if (a < 0xA0)
|
| + return false;
|
| + break;
|
| + case 0xED:
|
| + if (a > 0x9F)
|
| + return false;
|
| + break;
|
| + case 0xF0:
|
| + if (a < 0x90)
|
| + return false;
|
| + break;
|
| + case 0xF4:
|
| + if (a > 0x8F)
|
| + return false;
|
| + break;
|
| + default:
|
| + if (a < 0x80)
|
| + return false;
|
| }
|
|
|
| - case 1: if (*source >= 0x80 && *source < 0xC2) return false;
|
| + case 1:
|
| + if (*source >= 0x80 && *source < 0xC2)
|
| + return false;
|
| }
|
| if (*source > 0xF4)
|
| return false;
|
| @@ -217,12 +247,23 @@ static inline UChar32 readUTF8Sequence(const char*& sequence, unsigned length)
|
|
|
| // The cases all fall through.
|
| switch (length) {
|
| - case 6: character += static_cast<unsigned char>(*sequence++); character <<= 6;
|
| - case 5: character += static_cast<unsigned char>(*sequence++); character <<= 6;
|
| - case 4: character += static_cast<unsigned char>(*sequence++); character <<= 6;
|
| - case 3: character += static_cast<unsigned char>(*sequence++); character <<= 6;
|
| - case 2: character += static_cast<unsigned char>(*sequence++); character <<= 6;
|
| - case 1: character += static_cast<unsigned char>(*sequence++);
|
| + case 6:
|
| + character += static_cast<unsigned char>(*sequence++);
|
| + character <<= 6;
|
| + case 5:
|
| + character += static_cast<unsigned char>(*sequence++);
|
| + character <<= 6;
|
| + case 4:
|
| + character += static_cast<unsigned char>(*sequence++);
|
| + character <<= 6;
|
| + case 3:
|
| + character += static_cast<unsigned char>(*sequence++);
|
| + character <<= 6;
|
| + case 2:
|
| + character += static_cast<unsigned char>(*sequence++);
|
| + character <<= 6;
|
| + case 1:
|
| + character += static_cast<unsigned char>(*sequence++);
|
| }
|
|
|
| return character - offsetsFromUTF8[length - 1];
|
| @@ -263,10 +304,9 @@ ConversionResult convertUTF8ToUTF16(
|
| source -= utf8SequenceLength; // return to the illegal value itself
|
| result = sourceIllegal;
|
| break;
|
| - } else {
|
| - *target++ = replacementCharacter;
|
| - orAllData |= replacementCharacter;
|
| }
|
| + *target++ = replacementCharacter;
|
| + orAllData |= replacementCharacter;
|
| } else {
|
| *target++ = static_cast<UChar>(character); // normal case
|
| orAllData |= character;
|
| @@ -326,8 +366,9 @@ unsigned calculateStringHashAndLengthFromUTF8MaskingTop8Bits(const char* data, c
|
| if (!data[i])
|
| return 0;
|
| }
|
| - } else if (dataEnd - data < utf8SequenceLength)
|
| + } else if (dataEnd - data < utf8SequenceLength) {
|
| return 0;
|
| + }
|
|
|
| if (!isLegalUTF8(reinterpret_cast<const unsigned char*>(data), utf8SequenceLength))
|
| return 0;
|
| @@ -342,11 +383,11 @@ unsigned calculateStringHashAndLengthFromUTF8MaskingTop8Bits(const char* data, c
|
| stringHasher.addCharacter(static_cast<UChar>(character)); // normal case
|
| utf16Length++;
|
| } else if (U_IS_SUPPLEMENTARY(character)) {
|
| - stringHasher.addCharacters(static_cast<UChar>(U16_LEAD(character)),
|
| - static_cast<UChar>(U16_TRAIL(character)));
|
| + stringHasher.addCharacters(static_cast<UChar>(U16_LEAD(character)), static_cast<UChar>(U16_TRAIL(character)));
|
| utf16Length += 2;
|
| - } else
|
| + } else {
|
| return 0;
|
| + }
|
| }
|
|
|
| return stringHasher.hashWithTop8BitsMasked();
|
| @@ -384,8 +425,9 @@ ALWAYS_INLINE bool equalWithUTF8Internal(const CharType* a, const CharType* aEnd
|
| return false;
|
| if (*a++ != U16_TRAIL(character))
|
| return false;
|
| - } else
|
| + } else {
|
| return false;
|
| + }
|
| }
|
|
|
| return a == aEnd;
|
|
|